diff --git a/communication/src/main/java/datadog/communication/ddagent/DDAgentFeaturesDiscovery.java b/communication/src/main/java/datadog/communication/ddagent/DDAgentFeaturesDiscovery.java index 55929c73f51..10c1e57efd7 100644 --- a/communication/src/main/java/datadog/communication/ddagent/DDAgentFeaturesDiscovery.java +++ b/communication/src/main/java/datadog/communication/ddagent/DDAgentFeaturesDiscovery.java @@ -101,7 +101,6 @@ private static class State { String version; String telemetryProxyEndpoint; Set peerTags = emptySet(); - String orgPropagationMarker; long lastTimeDiscovered; } @@ -317,8 +316,6 @@ private boolean processInfoResponse(State newState, String response) { ? unmodifiableSet(new HashSet<>((List) peer_tags)) : emptySet(); } - Object opm = map.get("org_prop_marker"); - newState.orgPropagationMarker = (opm instanceof String) ? (String) opm : null; try { newState.state = Strings.sha256(response); } catch (Throwable ex) { @@ -406,10 +403,6 @@ public Set peerTags() { return discoveryState.peerTags; } - public String getOrgPropagationMarker() { - return discoveryState.orgPropagationMarker; - } - public String getMetricsEndpoint() { return discoveryState.metricsEndpoint; } diff --git a/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java b/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java index 60af53815fc..db31498a1ce 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java @@ -76,7 +76,6 @@ public final class GeneralConfig { public static final String TRACER_METRICS_MAX_PENDING = "trace.tracer.metrics.max.pending"; public static final String TRACER_METRICS_IGNORED_RESOURCES = "trace.tracer.metrics.ignored.resources"; - public static final String AZURE_APP_SERVICES = "azure.app.services"; public static final String INTERNAL_EXIT_ON_FAILURE = "trace.internal.exit.on.failure"; diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/AdversarialMetricsBenchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/AdversarialMetricsBenchmark.java index 634dea23358..01ba90097a4 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/AdversarialMetricsBenchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/AdversarialMetricsBenchmark.java @@ -60,7 +60,7 @@ @Fork(value = 1) public class AdversarialMetricsBenchmark { - private ConflatingMetricsAggregator aggregator; + private ClientStatsAggregator aggregator; private CountingHealthMetrics health; @State(Scope.Thread) @@ -72,13 +72,13 @@ public static class ThreadState { public void setup() { this.health = new CountingHealthMetrics(); this.aggregator = - new ConflatingMetricsAggregator( + new ClientStatsAggregator( new WellKnownTags("", "", "", "", "", ""), Collections.emptySet(), - new ConflatingMetricsAggregatorBenchmark.FixedAgentFeaturesDiscovery( + new ClientStatsAggregatorBenchmark.FixedAgentFeaturesDiscovery( Collections.singleton("peer.hostname"), Collections.emptySet()), this.health, - new ConflatingMetricsAggregatorBenchmark.NullSink(), + new ClientStatsAggregatorBenchmark.NullSink(), 2048, 2048, false); diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBenchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ClientStatsAggregatorBenchmark.java similarity index 95% rename from dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBenchmark.java rename to dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ClientStatsAggregatorBenchmark.java index b9a2f7f8c54..b9d72eaf3ab 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBenchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ClientStatsAggregatorBenchmark.java @@ -34,12 +34,12 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(MICROSECONDS) @Fork(value = 1) -public class ConflatingMetricsAggregatorBenchmark { +public class ClientStatsAggregatorBenchmark { private final DDAgentFeaturesDiscovery featuresDiscovery = new FixedAgentFeaturesDiscovery( Collections.singleton("peer.hostname"), Collections.emptySet()); - private final ConflatingMetricsAggregator aggregator = - new ConflatingMetricsAggregator( + private final ClientStatsAggregator aggregator = + new ClientStatsAggregator( new WellKnownTags("", "", "", "", "", ""), Collections.emptySet(), featuresDiscovery, diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorDDSpanBenchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ClientStatsAggregatorDDSpanBenchmark.java similarity index 87% rename from dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorDDSpanBenchmark.java rename to dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ClientStatsAggregatorDDSpanBenchmark.java index 89059857d9c..0453b8888db 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorDDSpanBenchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/ClientStatsAggregatorDDSpanBenchmark.java @@ -28,8 +28,8 @@ import org.openjdk.jmh.infra.Blackhole; /** - * Parallels {@link ConflatingMetricsAggregatorBenchmark} but uses real {@link DDSpan} instances - * instead of the lightweight {@code SimpleSpan} mock, so the JIT exercises the production {@link + * Parallels {@link ClientStatsAggregatorBenchmark} but uses real {@link DDSpan} instances instead + * of the lightweight {@code SimpleSpan} mock, so the JIT exercises the production {@link * CoreSpan#isKind} path (cached span.kind ordinal + bit-test) rather than the groovy mock's * dispatch. * @@ -50,21 +50,21 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(MICROSECONDS) @Fork(value = 1) -public class ConflatingMetricsAggregatorDDSpanBenchmark { +public class ClientStatsAggregatorDDSpanBenchmark { private static final CoreTracer TRACER = CoreTracer.builder().writer(new NoopWriter()).strictTraceWrites(false).build(); private final DDAgentFeaturesDiscovery featuresDiscovery = - new ConflatingMetricsAggregatorBenchmark.FixedAgentFeaturesDiscovery( + new ClientStatsAggregatorBenchmark.FixedAgentFeaturesDiscovery( Collections.singleton("peer.hostname"), Collections.emptySet()); - private final ConflatingMetricsAggregator aggregator = - new ConflatingMetricsAggregator( + private final ClientStatsAggregator aggregator = + new ClientStatsAggregator( new WellKnownTags("", "", "", "", "", ""), Collections.emptySet(), featuresDiscovery, HealthMetrics.NO_OP, - new ConflatingMetricsAggregatorBenchmark.NullSink(), + new ClientStatsAggregatorBenchmark.NullSink(), 2048, 2048, false); diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/HighCardinalityPeerMetricsBenchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/HighCardinalityPeerMetricsBenchmark.java index 67caaca6ced..8fc45b4acd8 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/HighCardinalityPeerMetricsBenchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/HighCardinalityPeerMetricsBenchmark.java @@ -50,7 +50,7 @@ @Fork(value = 1) public class HighCardinalityPeerMetricsBenchmark { - private ConflatingMetricsAggregator aggregator; + private ClientStatsAggregator aggregator; private CountingHealthMetrics health; @State(Scope.Thread) @@ -62,13 +62,13 @@ public static class ThreadState { public void setup() { this.health = new CountingHealthMetrics(); this.aggregator = - new ConflatingMetricsAggregator( + new ClientStatsAggregator( new WellKnownTags("", "", "", "", "", ""), Collections.emptySet(), - new ConflatingMetricsAggregatorBenchmark.FixedAgentFeaturesDiscovery( + new ClientStatsAggregatorBenchmark.FixedAgentFeaturesDiscovery( Collections.singleton("peer.hostname"), Collections.emptySet()), this.health, - new ConflatingMetricsAggregatorBenchmark.NullSink(), + new ClientStatsAggregatorBenchmark.NullSink(), 2048, 2048, false); diff --git a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/HighCardinalityResourceMetricsBenchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/HighCardinalityResourceMetricsBenchmark.java index 5ae8c3a715f..90c1f088935 100644 --- a/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/HighCardinalityResourceMetricsBenchmark.java +++ b/dd-trace-core/src/jmh/java/datadog/trace/common/metrics/HighCardinalityResourceMetricsBenchmark.java @@ -46,7 +46,7 @@ @Fork(value = 1) public class HighCardinalityResourceMetricsBenchmark { - private ConflatingMetricsAggregator aggregator; + private ClientStatsAggregator aggregator; private CountingHealthMetrics health; @State(Scope.Thread) @@ -58,13 +58,13 @@ public static class ThreadState { public void setup() { this.health = new CountingHealthMetrics(); this.aggregator = - new ConflatingMetricsAggregator( + new ClientStatsAggregator( new WellKnownTags("", "", "", "", "", ""), Collections.emptySet(), - new ConflatingMetricsAggregatorBenchmark.FixedAgentFeaturesDiscovery( + new ClientStatsAggregatorBenchmark.FixedAgentFeaturesDiscovery( Collections.singleton("peer.hostname"), Collections.emptySet()), this.health, - new ConflatingMetricsAggregatorBenchmark.NullSink(), + new ClientStatsAggregatorBenchmark.NullSink(), 2048, 2048, false); diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java index 5bc985491de..fb9b37a0da4 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java @@ -1,131 +1,98 @@ package datadog.trace.common.metrics; -import static datadog.trace.bootstrap.instrumentation.api.UTF8BytesString.EMPTY; - import datadog.metrics.api.Histogram; -import datadog.trace.api.Pair; -import datadog.trace.api.cache.DDCache; -import datadog.trace.api.cache.DDCaches; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; +import datadog.trace.core.monitor.HealthMetrics; import datadog.trace.util.Hashtable; import datadog.trace.util.LongHashingUtils; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.function.Function; import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** - * Hashtable entry for the consumer-side aggregator. Holds the UTF8-encoded label fields that {@link - * SerializingMetricWriter} writes to the wire plus the mutable counter/histogram state for the key. - * - *

{@link #matches(SpanSnapshot)} compares the entry's stored UTF8 forms against the snapshot's - * raw {@code CharSequence}/{@code String}/{@code String[]} fields via content-equality, so {@code - * String} vs {@code UTF8BytesString} mixing on the same logical key collapses into one entry - * instead of splitting. - * - *

The static UTF8 caches that used to live on {@code MetricKey} and {@code - * ConflatingMetricsAggregator} are consolidated here. - * - *

Deliberate cohesion. This class concentrates five responsibilities -- the static UTF8 - * caches, the canonicalized label fields, the raw {@code peerTagNames}/{@code peerTagValues} arrays - * used by {@link #matches}, the pre-encoded {@code peerTags} list used by the serializer, and the - * mutable counter/histogram aggregate state -- on a single object. The prior design split the label - * fields and aggregate state across separate {@code MetricKey} and {@code AggregateMetric} - * instances, allocating both per unique key on miss; folding them yields one allocation per unique - * key. The class is wider than its predecessors as a result, but that's the trade we explicitly - * chose. - * - *

Required vs optional field absence. Required label fields ({@code resource}, {@code - * service}, {@code operationName}, {@code type}, {@code spanKind}) canonicalize a {@code null} - * snapshot value into {@link UTF8BytesString#EMPTY} via {@link #canonicalize} -- they are never - * {@code null} on a constructed entry. Optional label fields ({@code serviceSource}, {@code - * httpMethod}, {@code httpEndpoint}, {@code grpcStatusCode}) stay {@code null} on the entry when - * the snapshot value was {@code null}; the serializer uses {@code != null} to decide whether to - * emit them on the wire. {@link #contentEquals} treats {@code null} and length-0 as equivalent so - * {@link #matches} works against either form. - * - *

Not thread-safe. Counter and histogram updates are performed by the single aggregator - * thread; producer threads tag durations via {@link #ERROR_TAG} / {@link #TOP_LEVEL_TAG} bits and - * hand them off through the snapshot inbox. - * - *

Single-writer invariant relies on convention. The aggregator thread is the only mutator - * of this class and of {@link AggregateTable}. Nothing enforces this at runtime -- a stray mutation - * from a different thread (e.g. an HTTP-client callback) would corrupt counters or hashtable chains - * silently. The {@code ClearSignal} routing in {@link Aggregator} is the explicit mechanism for - * funneling cross-thread requests (e.g. {@code disable()}) back onto the aggregator thread; any new - * entry point that mutates aggregate state must do the same. + * Aggregator hashtable entry: UTF8 label fields + counter/histogram state; hashing runs after + * canonicalization so overflow values collapse to a shared sentinel bucket rather than fragmenting. + * Not thread-safe — all mutation is on the aggregator thread. Tests must call {@link + * #resetCardinalityHandlers()} in setup to avoid cross-test handler pollution (handlers are + * static); tests using {@link PeerTagSchema} must also call {@link + * PeerTagSchema#resetHandlers(HealthMetrics)} on the schema instance. */ final class AggregateEntry extends Hashtable.Entry { + private static final Logger log = LoggerFactory.getLogger(AggregateEntry.class); + static final long ERROR_TAG = 0x8000000000000000L; static final long TOP_LEVEL_TAG = 0x4000000000000000L; - // UTF8 caches consolidated from the previous MetricKey + ConflatingMetricsAggregator split. - private static final DDCache RESOURCE_CACHE = - DDCaches.newFixedSizeCache(32); - private static final DDCache SERVICE_CACHE = - DDCaches.newFixedSizeCache(32); - private static final DDCache OPERATION_CACHE = - DDCaches.newFixedSizeCache(64); - private static final DDCache SERVICE_SOURCE_CACHE = - DDCaches.newFixedSizeCache(16); - private static final DDCache TYPE_CACHE = DDCaches.newFixedSizeCache(8); - private static final DDCache SPAN_KIND_CACHE = - DDCaches.newFixedSizeCache(16); - private static final DDCache HTTP_METHOD_CACHE = - DDCaches.newFixedSizeCache(8); - private static final DDCache HTTP_ENDPOINT_CACHE = - DDCaches.newFixedSizeCache(32); - private static final DDCache GRPC_STATUS_CODE_CACHE = - DDCaches.newFixedSizeCache(32); - - /** - * Outer cache keyed by peer-tag name, with an inner per-name cache keyed by value. The inner - * cache produces the "name:value" encoded form the serializer writes. - */ - private static final DDCache< - String, Pair, Function>> - PEER_TAGS_CACHE = DDCaches.newFixedSizeCache(64); - - private static final Function< - String, Pair, Function>> - PEER_TAGS_CACHE_ADDER = - key -> - Pair.of( - DDCaches.newFixedSizeCache(512), - value -> UTF8BytesString.create(key + ":" + value)); - - private final UTF8BytesString resource; - private final UTF8BytesString service; - private final UTF8BytesString operationName; - @Nullable private final UTF8BytesString serviceSource; - private final UTF8BytesString type; - private final UTF8BytesString spanKind; - @Nullable private final UTF8BytesString httpMethod; - @Nullable private final UTF8BytesString httpEndpoint; - @Nullable private final UTF8BytesString grpcStatusCode; - private final short httpStatusCode; - - /** Whether the root span carried the {@code synthetics} origin tag (synthetic-monitoring run). */ - private final boolean synthetic; - - /** Whether this span is the trace root ({@code parentId == 0}). */ - private final boolean traceRoot; - - // Peer tags carried in two forms: parallel String[] arrays mirroring the snapshot's (schema + - // values) shape for matches(), and pre-encoded List ("name:value") for the - // serializer. peerTagNames is the schema's names array (shared by-reference when the schema - // hasn't been replaced); peerTagValues is the per-span String[] parallel to it. - // - // Package-private so the in-package test helper (AggregateEntryTestUtils) can compare entries - // by raw layout; production access comes from this class's own matches() + constructor. - @Nullable final String[] peerTagNames; - @Nullable final String[] peerTagValues; - private final List peerTags; - - // Mutable aggregate state -- single-thread (consumer/aggregator) writer. + private static final UTF8BytesString[] EMPTY_TAGS = new UTF8BytesString[0]; + + // Sentinel substitution is disabled until per-component config is wired in a follow-up PR. + // Tests that need sentinel mode should pass useBlockedSentinel=true explicitly. + static final boolean LIMITS_ENABLED = false; + + // Per-field cardinality handlers. Limits live on MetricCardinalityLimits -- see that class for + // per-field rationale. + static final PropertyCardinalityHandler RESOURCE_HANDLER = + new PropertyCardinalityHandler("resource", MetricCardinalityLimits.RESOURCE, LIMITS_ENABLED); + static final PropertyCardinalityHandler SERVICE_HANDLER = + new PropertyCardinalityHandler("service", MetricCardinalityLimits.SERVICE, LIMITS_ENABLED); + static final PropertyCardinalityHandler OPERATION_HANDLER = + new PropertyCardinalityHandler( + "operation", MetricCardinalityLimits.OPERATION, LIMITS_ENABLED); + static final PropertyCardinalityHandler SERVICE_SOURCE_HANDLER = + new PropertyCardinalityHandler( + "service_source", MetricCardinalityLimits.SERVICE_SOURCE, LIMITS_ENABLED); + static final PropertyCardinalityHandler TYPE_HANDLER = + new PropertyCardinalityHandler("type", MetricCardinalityLimits.TYPE, LIMITS_ENABLED); + static final PropertyCardinalityHandler SPAN_KIND_HANDLER = + new PropertyCardinalityHandler( + "span_kind", MetricCardinalityLimits.SPAN_KIND, LIMITS_ENABLED); + static final PropertyCardinalityHandler HTTP_METHOD_HANDLER = + new PropertyCardinalityHandler( + "http_method", MetricCardinalityLimits.HTTP_METHOD, LIMITS_ENABLED); + static final PropertyCardinalityHandler HTTP_ENDPOINT_HANDLER = + new PropertyCardinalityHandler( + "http_endpoint", MetricCardinalityLimits.HTTP_ENDPOINT, LIMITS_ENABLED); + static final PropertyCardinalityHandler GRPC_STATUS_CODE_HANDLER = + new PropertyCardinalityHandler( + "grpc_status_code", MetricCardinalityLimits.GRPC_STATUS_CODE, LIMITS_ENABLED); + + // Single authoritative list used by resetCardinalityHandlers(). populate() and hashOf() keep + // named access for readability and to avoid per-span iteration overhead; this array ensures the + // reset site stays in sync even if a new field is added without updating the loop by name. + private static final PropertyCardinalityHandler[] FIELD_HANDLERS = { + RESOURCE_HANDLER, + SERVICE_HANDLER, + OPERATION_HANDLER, + SERVICE_SOURCE_HANDLER, + TYPE_HANDLER, + SPAN_KIND_HANDLER, + HTTP_METHOD_HANDLER, + HTTP_ENDPOINT_HANDLER, + GRPC_STATUS_CODE_HANDLER, + }; + + final UTF8BytesString resource; + final UTF8BytesString service; + final UTF8BytesString operationName; + // Optional fields use UTF8BytesString.EMPTY as the "absent" sentinel rather than null. The + // cardinality handlers map null inputs to EMPTY, and createUtf8 does the same for the of(...) + // factory, so callers don't need to special-case absence. + final UTF8BytesString serviceSource; + final UTF8BytesString type; + final UTF8BytesString spanKind; + final UTF8BytesString httpMethod; + final UTF8BytesString httpEndpoint; + final UTF8BytesString grpcStatusCode; + final short httpStatusCode; + final boolean synthetic; + final boolean traceRoot; + final List peerTags; + + // Mutable aggregate state -- single-thread (aggregator) writer. private final Histogram okLatencies = Histogram.newHistogram(); /** @@ -141,31 +108,43 @@ final class AggregateEntry extends Hashtable.Entry { private int topLevelCount; private long duration; - /** Hot-path constructor for the producer/consumer flow. Builds UTF8 fields via the caches. */ - AggregateEntry(SpanSnapshot s, long keyHash) { + /** Field-bearing constructor used by both the hot path and the test factory. */ + private AggregateEntry( + long keyHash, + UTF8BytesString resource, + UTF8BytesString service, + UTF8BytesString operationName, + UTF8BytesString serviceSource, + UTF8BytesString type, + UTF8BytesString spanKind, + UTF8BytesString httpMethod, + UTF8BytesString httpEndpoint, + UTF8BytesString grpcStatusCode, + short httpStatusCode, + boolean synthetic, + boolean traceRoot, + List peerTags) { super(keyHash); - this.resource = canonicalize(RESOURCE_CACHE, s.resourceName); - this.service = canonicalize(SERVICE_CACHE, s.serviceName); - this.operationName = canonicalize(OPERATION_CACHE, s.operationName); - this.serviceSource = canonicalizeOptional(SERVICE_SOURCE_CACHE, s.serviceNameSource); - this.type = canonicalize(TYPE_CACHE, s.spanType); - this.spanKind = canonicalize(SPAN_KIND_CACHE, s.spanKind); - this.httpMethod = canonicalizeOptional(HTTP_METHOD_CACHE, s.httpMethod); - this.httpEndpoint = canonicalizeOptional(HTTP_ENDPOINT_CACHE, s.httpEndpoint); - this.grpcStatusCode = canonicalizeOptional(GRPC_STATUS_CODE_CACHE, s.grpcStatusCode); - this.httpStatusCode = s.httpStatusCode; - this.synthetic = s.synthetic; - this.traceRoot = s.traceRoot; - this.peerTagNames = s.peerTagSchema == null ? null : s.peerTagSchema.names; - this.peerTagValues = s.peerTagValues; - this.peerTags = materializePeerTags(this.peerTagNames, this.peerTagValues); + this.resource = resource; + this.service = service; + this.operationName = operationName; + this.serviceSource = serviceSource; + this.type = type; + this.spanKind = spanKind; + this.httpMethod = httpMethod; + this.httpEndpoint = httpEndpoint; + this.grpcStatusCode = grpcStatusCode; + this.httpStatusCode = httpStatusCode; + this.synthetic = synthetic; + this.traceRoot = traceRoot; + this.peerTags = peerTags; } /** * Records a single hit. {@code tagAndDuration} carries the duration nanos with optional {@link * #ERROR_TAG} / {@link #TOP_LEVEL_TAG} bits OR-ed in. */ - void recordOneDuration(long tagAndDuration) { + AggregateEntry recordOneDuration(long tagAndDuration) { ++hitCount; if ((tagAndDuration & TOP_LEVEL_TAG) == TOP_LEVEL_TAG) { tagAndDuration ^= TOP_LEVEL_TAG; @@ -179,6 +158,7 @@ void recordOneDuration(long tagAndDuration) { okLatencies.accept(tagAndDuration); } duration += tagAndDuration; + return this; } int getErrorCount() { @@ -223,10 +203,10 @@ private Histogram errorLatenciesForWrite() { /** * Resets the per-cycle counters and histograms. Label fields ({@code resource}, {@code service}, - * ..., {@code peerTagNames}, {@code peerTagValues}) are deliberately left intact -- they're the - * entry's bucket identity and must persist so a subsequent snapshot with the same key reuses this - * entry instead of allocating a fresh one. Entries that stay at {@code hitCount == 0} across a - * cycle are reaped by {@link AggregateTable#expungeStaleAggregates}. + * ..., {@code peerTags}) are deliberately left intact -- they're the entry's bucket identity and + * must persist so a subsequent snapshot with the same key reuses this entry instead of allocating + * a fresh one. Entries that stay at {@code hitCount == 0} across a cycle are reaped by {@link + * AggregateTable#expungeStaleAggregates}. */ void clear() { this.errorCount = 0; @@ -240,70 +220,137 @@ void clear() { } } - boolean matches(SpanSnapshot s) { - String[] snapshotNames = s.peerTagSchema == null ? null : s.peerTagSchema.names; - return httpStatusCode == s.httpStatusCode - && synthetic == s.synthetic - && traceRoot == s.traceRoot - && contentEquals(resource, s.resourceName) - && contentEquals(service, s.serviceName) - && contentEquals(operationName, s.operationName) - && contentEquals(serviceSource, s.serviceNameSource) - && contentEquals(type, s.spanType) - && contentEquals(spanKind, s.spanKind) - && Arrays.equals(peerTagNames, snapshotNames) - && Arrays.equals(peerTagValues, s.peerTagValues) - && contentEquals(httpMethod, s.httpMethod) - && contentEquals(httpEndpoint, s.httpEndpoint) - && contentEquals(grpcStatusCode, s.grpcStatusCode); + /** + * Test-friendly factory mirroring the prior {@code new MetricKey(...)} positional args. Bypasses + * the cardinality handlers so tests don't pollute their state -- {@link UTF8BytesString}s are + * created directly. Content-equal entries from {@link Canonical#createEntry} still {@link + * #equals} an entry built via {@code of(...)}. + */ + static AggregateEntry of( + CharSequence resource, + CharSequence service, + CharSequence operationName, + CharSequence serviceSource, + CharSequence type, + int httpStatusCode, + boolean synthetic, + boolean traceRoot, + CharSequence spanKind, + List peerTags, + CharSequence httpMethod, + CharSequence httpEndpoint, + CharSequence grpcStatusCode) { + UTF8BytesString resourceUtf = createUtf8(resource); + UTF8BytesString serviceUtf = createUtf8(service); + UTF8BytesString operationNameUtf = createUtf8(operationName); + UTF8BytesString serviceSourceUtf = createUtf8(serviceSource); + UTF8BytesString typeUtf = createUtf8(type); + UTF8BytesString spanKindUtf = createUtf8(spanKind); + UTF8BytesString httpMethodUtf = createUtf8(httpMethod); + UTF8BytesString httpEndpointUtf = createUtf8(httpEndpoint); + UTF8BytesString grpcUtf = createUtf8(grpcStatusCode); + List peerTagsList = peerTags == null ? Collections.emptyList() : peerTags; + UTF8BytesString[] peerTagsArr = peerTagsList.toArray(new UTF8BytesString[0]); + long keyHash = + hashOf( + resourceUtf, + serviceUtf, + operationNameUtf, + serviceSourceUtf, + typeUtf, + spanKindUtf, + httpMethodUtf, + httpEndpointUtf, + grpcUtf, + (short) httpStatusCode, + synthetic, + traceRoot, + peerTagsArr, + peerTagsArr.length); + return new AggregateEntry( + keyHash, + resourceUtf, + serviceUtf, + operationNameUtf, + serviceSourceUtf, + typeUtf, + spanKindUtf, + httpMethodUtf, + httpEndpointUtf, + grpcUtf, + (short) httpStatusCode, + synthetic, + traceRoot, + peerTagsList); } /** - * Pre-checks {@link #keyHash} against {@code keyHash} before delegating to {@link - * #matches(SpanSnapshot)}. The hash check is cheap and rules out most mismatches without touching - * the field-by-field comparison. + * Resets the static per-field cardinality handlers. Does not cover {@link PeerTagSchema}. */ - boolean matches(long keyHash, SpanSnapshot s) { - return this.keyHash == keyHash && matches(s); + static void resetCardinalityHandlers() { + resetCardinalityHandlers(HealthMetrics.NO_OP); + } + + static void resetCardinalityHandlers(HealthMetrics healthMetrics) { + for (PropertyCardinalityHandler handler : FIELD_HANDLERS) { + reportIfBlocked(healthMetrics, handler); + } + PeerTagSchema.INTERNAL.resetHandlers(healthMetrics); + } + + private static void reportIfBlocked( + HealthMetrics healthMetrics, PropertyCardinalityHandler handler) { + long blocked = handler.reset(); + if (blocked > 0) { + log.warn( + "Cardinality limit reached for stats field '{}'; further values will be reported as tracer_blocked_value", + handler.name); + healthMetrics.onTagCardinalityBlocked(handler.statsDTag(), blocked); + } } /** - * Computes the 64-bit lookup hash for a {@link SpanSnapshot}. Chained per-field calls -- no - * varargs / Object[] allocation, no autoboxing on primitive overloads. The constructor's - * super({@code hashOf(s)}) call uses the same function so an entry built from a snapshot hashes - * to the same bucket the snapshot itself looks up. + * 64-bit lookup hash, computed over UTF8-encoded fields so that cardinality-blocked values (which + * all canonicalize to the same sentinel {@link UTF8BytesString}) collide in the same bucket. + * {@link UTF8BytesString#hashCode()} returns the underlying String hash, so entries built via + * {@link #of} produce the same hash as entries built from a snapshot with matching content. * - *

Hashes are content-stable across {@code String} / {@code UTF8BytesString}: {@link - * UTF8BytesString#hashCode()} returns the underlying {@code String}'s hash. + *

Field order intentionally mirrors {@link Canonical#matches} -- UTF8 fields first (highest + * cardinality first for matches' short-circuit benefit), then the peer-tag list, then the + * primitives. The hash itself is order-stable across all callers; the lockstep ordering is purely + * for readability when reasoning about lookup and equality in tandem. */ - static long hashOf(SpanSnapshot s) { + static long hashOf( + UTF8BytesString resource, + UTF8BytesString service, + UTF8BytesString operationName, + UTF8BytesString serviceSource, + UTF8BytesString type, + UTF8BytesString spanKind, + UTF8BytesString httpMethod, + UTF8BytesString httpEndpoint, + UTF8BytesString grpcStatusCode, + short httpStatusCode, + boolean synthetic, + boolean traceRoot, + UTF8BytesString[] peerTags, + int peerTagCount) { long h = 0; - h = LongHashingUtils.addToHash(h, s.resourceName); - h = LongHashingUtils.addToHash(h, s.serviceName); - h = LongHashingUtils.addToHash(h, s.operationName); - h = LongHashingUtils.addToHash(h, s.serviceNameSource); - h = LongHashingUtils.addToHash(h, s.spanType); - h = LongHashingUtils.addToHash(h, s.httpStatusCode); - h = LongHashingUtils.addToHash(h, s.synthetic); - h = LongHashingUtils.addToHash(h, s.traceRoot); - h = LongHashingUtils.addToHash(h, s.spanKind); - // Always mix in both the schema's content hash and the values' content hash, unconditionally - // (no null-skip). Arrays.hashCode is content-based for both String[]s; the default - // Object[].hashCode is identity-based, which would let two snapshots with content-equal but - // distinct PeerTagSchema instances hash to different buckets. Null inputs hash to 0 here, - // distinct from {@code Arrays.hashCode(empty)} = 1 or any non-empty array. - // - // peerTagValues is gated by peerTagSchema: the slot's peerTagValues is a reusable scratch - // buffer that may carry stale contents from a prior tag-firing publish when this publish had - // no peer tags. Hash it only when the schema says it's meaningful, matching the matches() - // contract. - h = LongHashingUtils.addToHash(h, s.peerTagSchema == null ? 0 : s.peerTagSchema.namesHash); - h = - LongHashingUtils.addToHash( - h, s.peerTagSchema == null ? 0 : Arrays.hashCode(s.peerTagValues)); - h = LongHashingUtils.addToHash(h, s.httpMethod); - h = LongHashingUtils.addToHash(h, s.httpEndpoint); - h = LongHashingUtils.addToHash(h, s.grpcStatusCode); + h = LongHashingUtils.addToHash(h, resource); + h = LongHashingUtils.addToHash(h, service); + h = LongHashingUtils.addToHash(h, operationName); + h = LongHashingUtils.addToHash(h, serviceSource); + h = LongHashingUtils.addToHash(h, type); + h = LongHashingUtils.addToHash(h, spanKind); + h = LongHashingUtils.addToHash(h, httpMethod); + h = LongHashingUtils.addToHash(h, httpEndpoint); + h = LongHashingUtils.addToHash(h, grpcStatusCode); + for (int i = 0; i < peerTagCount; i++) { + h = LongHashingUtils.addToHash(h, peerTags[i]); + } + h = LongHashingUtils.addToHash(h, httpStatusCode); + h = LongHashingUtils.addToHash(h, synthetic); + h = LongHashingUtils.addToHash(h, traceRoot); return h; } @@ -320,11 +367,20 @@ UTF8BytesString getOperationName() { return operationName; } - @Nullable UTF8BytesString getServiceSource() { return serviceSource; } + /** + * Whether the snapshot carried a service-source value. Encapsulates the EMPTY-as-absent + * convention: optional fields use {@link UTF8BytesString#EMPTY} as the sentinel for "no value + * captured" (see field comment) -- callers that need a presence check should go through this + * predicate rather than comparing against {@code EMPTY} directly. + */ + boolean hasServiceSource() { + return serviceSource.length() > 0; + } + UTF8BytesString getType() { return type; } @@ -333,21 +389,40 @@ UTF8BytesString getSpanKind() { return spanKind; } - @Nullable UTF8BytesString getHttpMethod() { return httpMethod; } - @Nullable + /** + * Whether the snapshot carried an HTTP method. See {@link #hasServiceSource} for the contract. + */ + boolean hasHttpMethod() { + return httpMethod.length() > 0; + } + UTF8BytesString getHttpEndpoint() { return httpEndpoint; } - @Nullable + /** + * Whether the snapshot carried an HTTP endpoint. See {@link #hasServiceSource} for the contract. + */ + boolean hasHttpEndpoint() { + return httpEndpoint.length() > 0; + } + UTF8BytesString getGrpcStatusCode() { return grpcStatusCode; } + /** + * Whether the snapshot carried a gRPC status code. See {@link #hasServiceSource} for the + * contract. + */ + boolean hasGrpcStatusCode() { + return grpcStatusCode.length() > 0; + } + int getHttpStatusCode() { return httpStatusCode; } @@ -365,101 +440,182 @@ List getPeerTags() { } // Production AggregateEntry intentionally has no equals/hashCode override -- AggregateTable - // bucketing uses keyHash + matches(SpanSnapshot) directly and never invokes Object.equals. - // For tests that need value-equality (Spock argument matchers), use AggregateEntryTestUtils in - // src/test, which provides equals/hashCode helpers without exposing the contract in production. - - // ----- helpers ----- - - private static UTF8BytesString canonicalize( - DDCache cache, CharSequence charSeq) { - if (charSeq == null) { - return EMPTY; - } - if (charSeq instanceof UTF8BytesString) { - return (UTF8BytesString) charSeq; - } - return cache.computeIfAbsent(charSeq.toString(), UTF8BytesString::create); - } + // bucketing uses keyHash + Canonical.matches and never invokes Object.equals. For tests that + // need value-equality (Spock argument matchers), use AggregateEntryTestUtils in src/test. /** - * Like {@link #canonicalize} but returns {@code null} for a {@code null} input (rather than - * {@link UTF8BytesString#EMPTY}). Used for the four optional fields so the serializer can - * distinguish "absent" via a {@code != null} check and elide the field on the wire. + * Reusable scratch buffer for canonicalizing a {@link SpanSnapshot} into UTF8 fields, computing + * its lookup hash, comparing against existing entries, and building a fresh entry on miss. * - *

The {@code instanceof UTF8BytesString} short-circuit is dead code for {@link - * SpanSnapshot#httpMethod}/{@code httpEndpoint}/{@code grpcStatusCode} (statically {@code - * String}) but live for {@link SpanSnapshot#serviceNameSource} ({@link CharSequence}); keeping a - * single helper keeps the constructor consistent. + *

One instance is held by an {@link AggregateTable} and reused on every {@code findOrInsert} + * call. Single-threaded use only. Fields are deliberately mutable -- this is a hot-path scratch + * area, not a value class. */ - @Nullable - private static UTF8BytesString canonicalizeOptional( - DDCache cache, @Nullable CharSequence charSeq) { - if (charSeq == null) { - return null; + static final class Canonical { + UTF8BytesString resource; + UTF8BytesString service; + UTF8BytesString operationName; + UTF8BytesString serviceSource; + UTF8BytesString type; + UTF8BytesString spanKind; + UTF8BytesString httpMethod; + UTF8BytesString httpEndpoint; + UTF8BytesString grpcStatusCode; + short httpStatusCode; + boolean synthetic; + boolean traceRoot; + + /** + * Reusable buffer of canonicalized peer-tag UTF8 forms. Cleared and refilled in {@link + * #populate}; on miss, {@link #createEntry} copies it into an immutable list for the entry to + * own. Zero allocation on the hit path. Sized lazily to the schema's tag count; resized if the + * schema grows. + */ + UTF8BytesString[] peerTagsBuffer = EMPTY_TAGS; + + int peerTagsSize = 0; + + long keyHash; + + /** Canonicalize all fields from {@code s} through the handlers into this buffer. */ + void populate(SpanSnapshot s) { + this.resource = RESOURCE_HANDLER.register(s.resourceName); + this.service = SERVICE_HANDLER.register(s.serviceName); + this.operationName = OPERATION_HANDLER.register(s.operationName); + this.serviceSource = SERVICE_SOURCE_HANDLER.register(s.serviceNameSource); + this.type = TYPE_HANDLER.register(s.spanType); + this.spanKind = SPAN_KIND_HANDLER.register(s.spanKind); + this.httpMethod = HTTP_METHOD_HANDLER.register(s.httpMethod); + this.httpEndpoint = HTTP_ENDPOINT_HANDLER.register(s.httpEndpoint); + this.grpcStatusCode = GRPC_STATUS_CODE_HANDLER.register(s.grpcStatusCode); + this.httpStatusCode = s.httpStatusCode; + this.synthetic = s.synthetic; + this.traceRoot = s.traceRoot; + populatePeerTags(s.peerTagSchema, s.peerTagValues); + this.keyHash = + hashOf( + resource, + service, + operationName, + serviceSource, + type, + spanKind, + httpMethod, + httpEndpoint, + grpcStatusCode, + httpStatusCode, + synthetic, + traceRoot, + peerTagsBuffer, + peerTagsSize); } - if (charSeq instanceof UTF8BytesString) { - return (UTF8BytesString) charSeq; - } - return cache.computeIfAbsent(charSeq.toString(), UTF8BytesString::create); - } - /** - * UTF8 vs raw CharSequence content-equality, no allocation in the common (String) case. - * - *

Treats {@code null} and empty (length 0) as equivalent on either side. This matches the - * canonicalization semantics: {@link #canonicalize} maps a {@code null} input to {@link - * UTF8BytesString#EMPTY}, so an entry built from a snapshot with a null field needs to match a - * subsequent snapshot whose field is still null. {@code intHash(null) == 0 == "".hashCode()}, so - * the hash already agrees with this view. - */ - private static boolean contentEquals(UTF8BytesString a, CharSequence b) { - if (a == null || a.length() == 0) { - return b == null || b.length() == 0; + /** + * Fills {@link #peerTagsBuffer} with canonical UTF8 forms, applying the schema's per-tag + * handler + warn-once notification at the same index. Skips null values rather than round- + * tripping them through the handler (which would return EMPTY and be filtered out anyway). + * Producer-side {@code capturePeerTagValues} produces sparse-null arrays, so the skip pays off + * whenever a span carries only a subset of the configured peer tags. + */ + private void populatePeerTags(PeerTagSchema schema, String[] values) { + peerTagsSize = 0; + if (schema == null || values == null) { + return; + } + int n = Math.min(schema.size(), values.length); + if (peerTagsBuffer.length < n) { + peerTagsBuffer = new UTF8BytesString[n]; + } + for (int i = 0; i < n; i++) { + String value = values[i]; + if (value == null) { + continue; + } + peerTagsBuffer[peerTagsSize++] = schema.register(i, value); + } } - // UTF8BytesString.toString() returns the underlying String -- O(1), no allocation. - return b != null && a.toString().contentEquals(b); - } - /** - * Encodes the per-span peer-tag values into the {@code List} the serializer - * consumes. Reads name/value pairs at the same index from the schema's names and the snapshot's - * values; null value slots are skipped (the span didn't set that peer tag). Counts hits once for - * exact-size allocation and preserves the singletonList fast path for the common one-entry case - * (e.g. internal-kind base.service). - */ - private static List materializePeerTags( - @Nullable String[] names, @Nullable String[] values) { - if (names == null || values == null) { - return Collections.emptyList(); + /** + * Whether this canonicalized snapshot matches the given entry. Compares UTF8 fields via + * content-equality (so an entry surviving a handler reset still matches a freshly-canonicalized + * snapshot of the same content). + * + *

Field order is cardinality-tuned: resource / service / operationName first because they + * vary most across collisions, then the remaining UTF8 fields, then the peer-tag list + * comparison (slowest), then the primitives. All UTF8 fields are non-null by the EMPTY- + * sentinel invariant (see field comments above), so direct {@code a.equals(b)} is safe. + */ + boolean matches(AggregateEntry e) { + return resource.equals(e.resource) + && service.equals(e.service) + && operationName.equals(e.operationName) + && serviceSource.equals(e.serviceSource) + && type.equals(e.type) + && spanKind.equals(e.spanKind) + && httpMethod.equals(e.httpMethod) + && httpEndpoint.equals(e.httpEndpoint) + && grpcStatusCode.equals(e.grpcStatusCode) + && peerTagsEqual(peerTagsBuffer, peerTagsSize, e.peerTags) + && httpStatusCode == e.httpStatusCode + && synthetic == e.synthetic + && traceRoot == e.traceRoot; } - int n = names.length; - int firstHit = -1; - int hitCount = 0; - for (int i = 0; i < n; i++) { - if (values[i] != null) { - if (hitCount == 0) firstHit = i; - hitCount++; + + private static boolean peerTagsEqual(UTF8BytesString[] a, int aSize, List b) { + if (aSize != b.size()) { + return false; } + for (int i = 0; i < aSize; i++) { + if (!a[i].equals(b.get(i))) { + return false; + } + } + return true; } - if (hitCount == 0) { - return Collections.emptyList(); - } - if (hitCount == 1) { - return Collections.singletonList(encodePeerTag(names[firstHit], values[firstHit])); - } - List tags = new ArrayList<>(hitCount); - for (int i = firstHit; i < n; i++) { - if (values[i] != null) { - tags.add(encodePeerTag(names[i], values[i])); + + /** + * Build a new entry from the currently-populated canonical fields. The peer-tag buffer is + * copied into an immutable list so the entry's reference stays stable across subsequent {@link + * #populate} calls. + */ + AggregateEntry createEntry() { + List snapshottedPeerTags; + int n = peerTagsSize; + if (n == 0) { + snapshottedPeerTags = Collections.emptyList(); + } else if (n == 1) { + snapshottedPeerTags = Collections.singletonList(peerTagsBuffer[0]); + } else { + snapshottedPeerTags = Arrays.asList(Arrays.copyOf(peerTagsBuffer, n)); } + return new AggregateEntry( + keyHash, + resource, + service, + operationName, + serviceSource, + type, + spanKind, + httpMethod, + httpEndpoint, + grpcStatusCode, + httpStatusCode, + synthetic, + traceRoot, + snapshottedPeerTags); } - return tags; } - private static UTF8BytesString encodePeerTag(String name, String value) { - final Pair, Function> - cacheAndCreator = PEER_TAGS_CACHE.computeIfAbsent(name, PEER_TAGS_CACHE_ADDER); - return cacheAndCreator.getLeft().computeIfAbsent(value, cacheAndCreator.getRight()); + // ----- helpers ----- + + /** Direct {@link UTF8BytesString} creation that bypasses the cardinality handlers. */ + private static UTF8BytesString createUtf8(CharSequence cs) { + if (cs == null) { + return UTF8BytesString.EMPTY; + } + if (cs instanceof UTF8BytesString) { + return (UTF8BytesString) cs; + } + return UTF8BytesString.create(cs.toString()); } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java index abadc7e5f17..71e96022ba7 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java @@ -7,14 +7,14 @@ import java.util.function.Consumer; /** - * Consumer-side {@link AggregateEntry} store, keyed on the raw fields of a {@link SpanSnapshot}. + * Consumer-side {@link AggregateEntry} store, keyed on the canonical UTF8-encoded labels of a + * {@link SpanSnapshot}. * - *

Replaces the prior {@code LRUCache}. The win is on the - * steady-state hit path: a snapshot lookup is a 64-bit hash compute + bucket walk + field-wise - * {@code matches}, with no per-snapshot {@link AggregateEntry} allocation and no UTF8 cache - * lookups. The UTF8-encoded forms (formerly held on {@code MetricKey}) and the mutable counters - * (formerly held on {@code AggregateMetric}) both live on the {@link AggregateEntry} now, built - * once per unique key at insert time. + *

{@link #findOrInsert} canonicalizes the snapshot's fields through the cardinality handlers (so + * cardinality-blocked values share a sentinel and collapse into one entry) and then computes the + * lookup hash from that canonical form. Canonicalization runs into a reusable {@link + * AggregateEntry.Canonical} scratch buffer; on a hit nothing is allocated, on a miss the buffer's + * references are copied into a fresh entry and the buffer is overwritten on the next call. * *

Not thread-safe. The aggregator thread is the sole writer of both this table and its * contained {@link AggregateEntry} state. Any cross-thread request that needs to mutate -- e.g. @@ -27,6 +27,7 @@ final class AggregateTable { private final Hashtable.Entry[] buckets; private final int maxAggregates; + private final AggregateEntry.Canonical canonical = new AggregateEntry.Canonical(); private int size; /** @@ -55,18 +56,19 @@ boolean isEmpty() { * caller should drop the data point in that case. */ AggregateEntry findOrInsert(SpanSnapshot snapshot) { - long keyHash = AggregateEntry.hashOf(snapshot); + canonical.populate(snapshot); + long keyHash = canonical.keyHash; for (AggregateEntry candidate = Support.bucket(buckets, keyHash); candidate != null; candidate = candidate.next()) { - if (candidate.matches(keyHash, snapshot)) { + if (candidate.keyHash == keyHash && canonical.matches(candidate)) { return candidate; } } if (size >= maxAggregates && !evictOneStale()) { return null; } - AggregateEntry entry = new AggregateEntry(snapshot, keyHash); + AggregateEntry entry = canonical.createEntry(); Support.insertHeadEntry(buckets, keyHash, entry); size++; return entry; @@ -80,8 +82,13 @@ AggregateEntry findOrInsert(SpanSnapshot snapshot) { * *

The semantic intent: at cap with all entries live, drop the new key (reported via {@code * onStatsAggregateDropped}) rather than evicting an established one. Cap is sized to the - * steady-state working set, so eviction is rare; this cursor optimization handles the - * pathological "persistently at cap" case. + * steady-state working set, so eviction is rare in the common case. + * + *

How often this fires depends on {@link AggregateEntry#LIMITS_ENABLED}. With limits enabled, + * over-cap values for a given field collapse into a shared {@code blocked_by_tracer} bucket, so + * the table itself rarely reaches {@code maxAggregates}. With limits disabled (the default), + * over-cap values flow to distinct buckets and {@code maxAggregates} becomes the load-bearing + * backstop -- the cursor-resumed scan was added specifically for this regime. */ private boolean evictOneStale() { // Two passes -- [cursor, length) then [0, cursor) -- using the half-open-range iterator. The diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java index d809d452522..8a33d3f1ea7 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java @@ -30,10 +30,10 @@ final class Aggregator implements Runnable { /** * Per-cycle hook run on the aggregator thread at the start of each report cycle, before the - * flush. Used by {@link ConflatingMetricsAggregator} to reconcile its cached peer-tag schema - * against {@link datadog.communication.ddagent.DDAgentFeaturesDiscovery}; running before the - * flush guarantees that any test awaiting {@code writer.finishBucket()} observes the schema in - * its post-reconcile state. May be {@code null}. + * flush. Used by {@link ClientStatsAggregator} to reconcile its cached peer-tag schema against + * {@link datadog.communication.ddagent.DDAgentFeaturesDiscovery}; running before the flush + * guarantees that any test awaiting {@code writer.finishBucket()} observes the schema in its + * post-reconcile state. May be {@code null}. */ private final Runnable onReportCycle; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ClientStatsAggregator.java similarity index 76% rename from dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java rename to dd-trace-core/src/main/java/datadog/trace/common/metrics/ClientStatsAggregator.java index 895ee434854..e710f36f23e 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ClientStatsAggregator.java @@ -4,7 +4,6 @@ import static datadog.trace.api.DDSpanTypes.RPC; import static datadog.trace.bootstrap.instrumentation.api.Tags.HTTP_ENDPOINT; import static datadog.trace.bootstrap.instrumentation.api.Tags.HTTP_METHOD; -import static datadog.trace.bootstrap.instrumentation.api.Tags.SPAN_KIND; import static datadog.trace.common.metrics.AggregateEntry.ERROR_TAG; import static datadog.trace.common.metrics.AggregateEntry.TOP_LEVEL_TAG; import static datadog.trace.common.metrics.SignalItem.ClearSignal.CLEAR; @@ -40,14 +39,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public final class ConflatingMetricsAggregator implements MetricsAggregator, EventListener { +public final class ClientStatsAggregator implements MetricsAggregator, EventListener { - private static final Logger log = LoggerFactory.getLogger(ConflatingMetricsAggregator.class); + private static final Logger log = LoggerFactory.getLogger(ClientStatsAggregator.class); private static final Map DEFAULT_HEADERS = Collections.singletonMap(DDAgentApi.DATADOG_META_TRACER_VERSION, DDTraceCoreInfo.VERSION); - private static final CharSequence SYNTHETICS_ORIGIN = "synthetics"; + private static final String SYNTHETICS_ORIGIN = "synthetics"; private static final SpanKindFilter METRICS_ELIGIBLE_KINDS = SpanKindFilter.builder() @@ -78,22 +77,23 @@ public final class ConflatingMetricsAggregator implements MetricsAggregator, Eve * Cached peer-aggregation schema. Producers read this reference once per trace and pass it * through to the consumer in {@link SpanSnapshot}; they never inspect the schema's discovery * state or rebuild it. Reconciliation is the aggregator thread's job: {@link - * #reconcilePeerTagSchema()} compares the schema's {@link PeerTagSchema#state} against {@link - * DDAgentFeaturesDiscovery#state()} once per reporting cycle and either updates the state in - * place (when the tag set is unchanged) or swaps in a freshly-built schema. + * #reconcilePeerTagSchema()} (called from {@link #resetCardinalityHandlers()}) compares the + * schema's {@link PeerTagSchema#state} against {@link DDAgentFeaturesDiscovery#state()} once per + * reporting cycle and either updates the state in place (when the tag set is unchanged, + * preserving the schema's warm cardinality handlers) or swaps in a freshly-built schema. * - *

{@code null} only on the bootstrap window before {@link #bootstrapPeerTagSchema()} runs on - * the first publish. + *

An empty schema (size 0) represents the "peer tags unconfigured" state; {@code null} only on + * the bootstrap window before {@link #bootstrapPeerTagSchema()} runs on the first publish. * *

{@code volatile} so the consumer's reconcile-time replacement is visible to producer - * threads; the schema's own internal mutable state ({@link PeerTagSchema#state}) is exercised - * only on the aggregator thread. + * threads; the schema's own internal mutable state (handlers, block counters, {@link + * PeerTagSchema#state}) is exercised only on the aggregator thread. */ private volatile PeerTagSchema cachedPeerTagSchema; private volatile AgentTaskScheduler.Scheduled cancellation; - public ConflatingMetricsAggregator( + public ClientStatsAggregator( Config config, SharedCommunicationObjects sharedCommunicationObjects, HealthMetrics healthMetrics) { @@ -114,7 +114,7 @@ public ConflatingMetricsAggregator( config.isTraceResourceRenamingEnabled()); } - ConflatingMetricsAggregator( + ClientStatsAggregator( WellKnownTags wellKnownTags, Set ignoredResources, DDAgentFeaturesDiscovery features, @@ -136,7 +136,7 @@ public ConflatingMetricsAggregator( includeEndpointInMetrics); } - ConflatingMetricsAggregator( + ClientStatsAggregator( WellKnownTags wellKnownTags, Set ignoredResources, DDAgentFeaturesDiscovery features, @@ -160,7 +160,7 @@ public ConflatingMetricsAggregator( includeEndpointInMetrics); } - ConflatingMetricsAggregator( + ClientStatsAggregator( Set ignoredResources, DDAgentFeaturesDiscovery features, HealthMetrics healthMetric, @@ -185,7 +185,7 @@ public ConflatingMetricsAggregator( reportingInterval, timeUnit, healthMetric, - this::reconcilePeerTagSchema); + this::resetCardinalityHandlers); this.thread = newAgentThread(METRICS_AGGREGATOR, aggregator); this.reportingInterval = reportingInterval; this.reportingIntervalTimeUnit = timeUnit; @@ -267,17 +267,26 @@ public boolean publish(List> trace) { boolean forceKeep = false; int counted = 0; if (features.supportsMetrics()) { + // Producer-side fast path: one volatile read and use whatever schema is currently cached. + // The aggregator thread keeps this schema in sync with feature discovery in + // resetCardinalityHandlers(). The only producer-side rebuild is the one-time bootstrap on + // the first publish. + PeerTagSchema peerTagSchema = cachedPeerTagSchema; + if (peerTagSchema == null) { + peerTagSchema = bootstrapPeerTagSchema(); + } for (CoreSpan span : trace) { boolean isTopLevel = span.isTopLevel(); if (shouldComputeMetric(span, isTopLevel)) { final CharSequence resourceName = span.getResourceName(); - if (resourceName != null && ignoredResources.contains(resourceName.toString())) { + if (!ignoredResources.isEmpty() + && resourceName != null + && ignoredResources.contains(resourceName.toString())) { // skip publishing all children - forceKeep = false; break; } counted++; - forceKeep |= publish(span, isTopLevel); + forceKeep |= publish(span, isTopLevel, peerTagSchema); } } healthMetrics.onClientStatTraceComputed(counted, trace.size(), !forceKeep); @@ -292,20 +301,13 @@ private boolean shouldComputeMetric(CoreSpan span, boolean isTopLevel) { && span.getDurationNano() > 0; } - private boolean publish(CoreSpan span, boolean isTopLevel) { - // Error decision drives force-keep sampling regardless of whether the snapshot gets queued. + private boolean publish(CoreSpan span, boolean isTopLevel, PeerTagSchema peerTagSchema) { boolean error = span.getError() > 0; - - // Fast-path the inbox-full case before any tag extraction or snapshot allocation. size() is - // approximate on jctools' MPSC queue but that's fine: if we under-estimate, we fall through - // and let inbox.offer be the source of truth (existing behavior); if we over-estimate, we - // drop a snapshot that would have fit -- acceptable, onStatsInboxFull was going to fire - // imminently anyway. + // size() is approximate on jctools MPSC queues but good enough for a fast-path overflow check. if (inbox.size() >= inbox.capacity()) { healthMetrics.onStatsInboxFull(); return error; } - // Extract HTTP method and endpoint only if the feature is enabled String httpMethod = null; String httpEndpoint = null; @@ -322,20 +324,23 @@ private boolean publish(CoreSpan span, boolean isTopLevel) { Object grpcStatusObj = span.unsafeGetTag(InstrumentationTags.GRPC_STATUS_CODE); grpcStatusCode = grpcStatusObj != null ? grpcStatusObj.toString() : null; } - // CharSequence default keeps unsafeGetTag's generic at CharSequence so UTF8BytesString - // tag values don't trigger a ClassCastException on the String assignment. - final String spanKind = span.unsafeGetTag(SPAN_KIND, (CharSequence) "").toString(); + // DDSpan resolves this from a cached span.kind ordinal via a small lookup array, skipping a + // tag-map lookup. Other CoreSpan impls fall back to the tag map by default. + String spanKind = span.getSpanKindString(); + if (spanKind == null) { + spanKind = ""; + } long tagAndDuration = span.getDurationNano() | (error ? ERROR_TAG : 0L) | (isTopLevel ? TOP_LEVEL_TAG : 0L); - PeerTagSchema peerTagSchema = peerTagSchemaFor(span); + PeerTagSchema spanPeerTagSchema = peerTagSchemaFor(spanKind, peerTagSchema); String[] peerTagValues = - peerTagSchema == null ? null : capturePeerTagValues(span, peerTagSchema); + spanPeerTagSchema == null ? null : capturePeerTagValues(span, spanPeerTagSchema); if (peerTagValues == null) { - // No tags fired -- drop the schema reference so the consumer doesn't bother iterating an - // all-null array. - peerTagSchema = null; + // capture returned no non-null values -- drop the schema reference so the consumer doesn't + // bother iterating an all-null array. + spanPeerTagSchema = null; } SpanSnapshot snapshot = @@ -349,7 +354,7 @@ private boolean publish(CoreSpan span, boolean isTopLevel) { isSynthetic(span), span.getParentId() == 0, spanKind, - peerTagSchema, + spanPeerTagSchema, peerTagValues, httpMethod, httpEndpoint, @@ -362,26 +367,6 @@ private boolean publish(CoreSpan span, boolean isTopLevel) { return error; } - /** - * Picks the peer-tag schema for a span. For internal-kind spans we always use the static {@link - * PeerTagSchema#INTERNAL} singleton (one entry for {@code base.service}); for {@code - * client}/{@code producer}/{@code consumer} kinds we use the cached peer-aggregation schema - * synced from {@link DDAgentFeaturesDiscovery#peerTags()}. Other kinds get {@code null}. - */ - private PeerTagSchema peerTagSchemaFor(CoreSpan span) { - if (span.isKind(PEER_AGGREGATION_KINDS)) { - PeerTagSchema schema = cachedPeerTagSchema; - if (schema == null) { - schema = bootstrapPeerTagSchema(); - } - return schema.size() > 0 ? schema : null; - } - if (span.isKind(INTERNAL_KIND)) { - return PeerTagSchema.INTERNAL; - } - return null; - } - /** * One-time producer-side bootstrap of {@link #cachedPeerTagSchema}. Synchronized double-check * guards against two producers racing on the very first publish; after this returns, {@code @@ -416,14 +401,32 @@ private PeerTagSchema buildPeerTagSchema() { return PeerTagSchema.of(names == null ? Collections.emptySet() : names, state); } + /** + * Single reset hook invoked on the aggregator thread at the end of each report cycle. Reconciles + * the cached peer-tag schema against the latest feature discovery, then resets all cardinality + * state in lockstep: the static property handlers + {@code PeerTagSchema.INTERNAL} (via {@link + * AggregateEntry#resetCardinalityHandlers()}) and the cached peer-tag schema (with whatever + * reconciliation just produced). New handlers added anywhere in this pipeline should be reset + * from here. + */ + private void resetCardinalityHandlers() { + reconcilePeerTagSchema(); + AggregateEntry.resetCardinalityHandlers(healthMetrics); + PeerTagSchema schema = cachedPeerTagSchema; + if (schema != null) { + schema.resetHandlers(healthMetrics); + } + } + /** * Reconciles {@link #cachedPeerTagSchema} with the latest feature discovery. Runs on the * aggregator thread once per reporting cycle via the reset hook passed to {@link Aggregator}. * Cheap fast path: an equality check against the cached schema's embedded {@link * DDAgentFeaturesDiscovery#state()} hash short-circuits when discovery's response hasn't changed * since the schema was built. On mismatch, a set compare distinguishes "discovery response - * changed but peer tags are the same" (just update the cached state in place) from "tags actually - * changed" (build a new schema and swap the volatile reference). + * changed but peer tags are the same" (just update the cached state in place to preserve the warm + * cardinality handlers) from "tags actually changed" (build a new schema and swap the volatile + * reference). */ private void reconcilePeerTagSchema() { PeerTagSchema cached = cachedPeerTagSchema; @@ -436,19 +439,37 @@ private void reconcilePeerTagSchema() { return; } Set latestNames = features.peerTags(); - Set normalized = latestNames == null ? Collections.emptySet() : latestNames; + Set normalized = latestNames == null ? Collections.emptySet() : latestNames; if (cached.hasSameTagsAs(normalized)) { cached.state = latestState; } else { + // Tags actually changed: flush the outgoing schema's accumulated block telemetry before + // discarding it, otherwise the partial-cycle blockedCounts would silently disappear. + cached.resetHandlers(healthMetrics); cachedPeerTagSchema = PeerTagSchema.of(normalized, latestState); } } /** - * Captures the span's peer-tag values into a {@code String[]} parallel to {@code schema.names}. - * Slots remain {@code null} for tags the span didn't set; the array itself is lazily allocated on - * the first hit so spans that fire no peer tags pay zero allocation. Returns {@code null} when - * none of the configured peer tags are set on the span. + * Picks the peer-tag schema for a span. The {@code peerTagSchema} argument is the per-trace + * cached schema (read once in {@link #publish(List)} via the volatile {@link + * #cachedPeerTagSchema}, with {@link #bootstrapPeerTagSchema()} taking care of the first-publish + * window) -- always non-null but possibly empty when peer tags are unconfigured. For + * internal-kind spans the static {@link PeerTagSchema#INTERNAL} schema is used regardless. + */ + private static PeerTagSchema peerTagSchemaFor(String spanKind, PeerTagSchema peerTagSchema) { + if (peerTagSchema.size() > 0 && PEER_AGGREGATION_KINDS.matches(spanKind)) { + return peerTagSchema; + } + if (INTERNAL_KIND.matches(spanKind)) { + return PeerTagSchema.INTERNAL; + } + return null; + } + + /** + * Captures the span's peer tag values into a {@code String[]} parallel to {@code schema.names}. + * Returns {@code null} when none of the configured peer tags are set on the span. */ private static String[] capturePeerTagValues(CoreSpan span, PeerTagSchema schema) { String[] names = schema.names; @@ -467,7 +488,8 @@ private static String[] capturePeerTagValues(CoreSpan span, PeerTagSchema sch } private static boolean isSynthetic(CoreSpan span) { - return span.getOrigin() != null && SYNTHETICS_ORIGIN.equals(span.getOrigin().toString()); + CharSequence origin = span.getOrigin(); + return origin != null && SYNTHETICS_ORIGIN.contentEquals(origin); } public void stop() { @@ -526,11 +548,10 @@ private void disable() { } } - private static final class ReportTask - implements AgentTaskScheduler.Task { + private static final class ReportTask implements AgentTaskScheduler.Task { @Override - public void run(ConflatingMetricsAggregator target) { + public void run(ClientStatsAggregator target) { target.report(); } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricCardinalityLimits.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricCardinalityLimits.java new file mode 100644 index 00000000000..3e1e696eb89 --- /dev/null +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricCardinalityLimits.java @@ -0,0 +1,73 @@ +package datadog.trace.common.metrics; + +/** + * Per-field caps on the number of distinct values canonicalized per reporting cycle. Overflow + * values collapse to a {@code tracer_blocked_value} sentinel so they merge into one aggregate row + * instead of fragmenting the table. + * + *

Values are sized to the typical-service workload with headroom; "typical" estimates are noted + * inline. Raise if a workload routinely hits the sentinel; lower carries proportional memory + * savings but risks suppressing legitimate distinctions. + */ +final class MetricCardinalityLimits { + private MetricCardinalityLimits() {} + + /** + * Distinct {@code resource.name} values per cycle. Highest-cardinality field by far: DB-query + * obfuscations, HTTP route templates, custom resources. Typical service: 30-200 unique. + */ + static final int RESOURCE = 256; + + /** + * Distinct {@code service.name} values per cycle. Local service plus downstream peer-service + * names. Microservice meshes typically reference 10-50 distinct services. + */ + static final int SERVICE = 32; + + /** + * Distinct {@code operation.name} values per cycle. Names like {@code http.request}, {@code + * db.query}, etc. Typical service: 10-30 across integrations. + */ + static final int OPERATION = 64; + + /** + * Distinct {@code _dd.base_service} override values per cycle. Used rarely; usually empty or one + * of a handful per service. + */ + static final int SERVICE_SOURCE = 16; + + /** + * Distinct {@code span.type} values per cycle. {@code DDSpanTypes} catalog is ~30; a single + * service usually spans 5-10 integration types. + */ + static final int TYPE = 16; + + /** + * Distinct {@code span.kind} values per cycle. OTel defines exactly 5 (server/client/producer/ + * consumer/internal); 8 still leaves 60% headroom in case a producer invents new kinds. + */ + static final int SPAN_KIND = 8; + + /** + * Distinct HTTP method values per cycle. Standard verbs are 7-9; WebDAV/custom adds a few more. + */ + static final int HTTP_METHOD = 16; + + /** + * Distinct {@code http.endpoint} values per cycle. Path templates -- same shape as {@code + * RESOURCE} for HTTP-heavy services. Only used when {@code includeEndpointInMetrics} is enabled. + */ + static final int HTTP_ENDPOINT = 64; + + /** + * Distinct gRPC status code values per cycle. gRPC spec defines exactly 17 codes (0-16); 24 + * leaves headroom for unknown-code edge cases without wasting space. + */ + static final int GRPC_STATUS_CODE = 24; + + /** + * Distinct values per peer-tag name (e.g. distinct {@code peer.hostname} values). Each configured + * peer tag gets its own handler at this limit. + */ + static final int PEER_TAG_VALUE = 512; +} diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricsAggregatorFactory.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricsAggregatorFactory.java index 09464310113..b9530871763 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricsAggregatorFactory.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricsAggregatorFactory.java @@ -15,7 +15,7 @@ public static MetricsAggregator createMetricsAggregator( HealthMetrics healthMetrics) { if (config.isTracerMetricsEnabled()) { log.debug("tracer metrics enabled"); - return new ConflatingMetricsAggregator(config, sharedCommunicationObjects, healthMetrics); + return new ClientStatsAggregator(config, sharedCommunicationObjects, healthMetrics); } log.debug("tracer metrics disabled"); return NoOpMetricsAggregator.INSTANCE; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java index d3a3d47d65a..a4182386559 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java @@ -3,17 +3,19 @@ import static datadog.trace.api.DDTags.BASE_SERVICE; import datadog.communication.ddagent.DDAgentFeaturesDiscovery; +import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; +import datadog.trace.core.monitor.HealthMetrics; import java.util.Set; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** - * Names of the peer-tags eligible for client-stats aggregation, packed into a flat {@code String[]} - * for parallel-array access by producers and the aggregator thread. + * Parallel arrays of peer-tag names and their {@link TagCardinalityHandler}s, indexed in lockstep. * - *

This is the minimal carrier shape used by {@link SpanSnapshot}: the producer captures per-span - * values into a {@code String[]} parallel to {@link #names}, and the aggregator reconstructs the - * encoded {@code tag:value} pairs from the same name index. It replaces the prior "flat pairs" - * {@code [name0, value0, name1, value1, ...]} layout, which forced a worst-case allocation + - * trim-and-copy on every span. + *

Replaces the previous {@code Map} lookup with positional array + * access: the producer captures span tag values into a {@code String[]} parallel to {@link #names}, + * and the consumer calls {@link #register(int, String)} at the same index to canonicalize the value + * through the per-tag cardinality handler. * *

Two schemas exist: * @@ -21,35 +23,39 @@ *

  • {@link #INTERNAL} -- a singleton with one entry for {@code base.service}, used for * internal-kind spans where only the base service is aggregated. *
  • A peer-aggregation schema built via {@link #of(Set, String)} for {@code client}/{@code - * producer}/{@code consumer} spans. {@link ConflatingMetricsAggregator} caches the most - * recently built schema and reconciles it on the aggregator thread once per reporting cycle - * by comparing {@link #state} against {@link DDAgentFeaturesDiscovery#state()}. + * producer}/{@code consumer} spans. {@link ClientStatsAggregator} caches the most recently + * built schema and reconciles it on the aggregator thread once per reporting cycle by + * comparing {@link #state} against {@link DDAgentFeaturesDiscovery#state()}. * * - *

    This class deliberately has no cardinality limiters -- callers that need those layer them on - * top. + *

    Cardinality blocks are counted inside each {@link TagCardinalityHandler} and flushed once per + * cycle (with a warn log) in {@link #resetHandlers(HealthMetrics)}. * - *

    Thread-safety: {@link #names} is final and safe to read from any thread. {@link #state} - * is exercised only on the aggregator thread (read and updated in reconciliation); producer threads - * access the schema only through the volatile {@code cachedPeerTagSchema} reference in {@link - * ConflatingMetricsAggregator}. + *

    Each {@link SpanSnapshot} captures its own schema reference so producer and consumer agree on + * the indexing even if the current schema is replaced between capture and consumption. + * + *

    Thread-safety: all mutable state ({@link TagCardinalityHandler}s and {@link #state}) is + * exercised only on the aggregator thread. {@link #names} and {@link #handlers} are final and safe + * to read from any thread; producer threads access them through the volatile {@code + * cachedPeerTagSchema} reference in {@link ClientStatsAggregator}. */ final class PeerTagSchema { - /** Singleton schema for internal-kind spans -- only {@code base.service}. */ - static final PeerTagSchema INTERNAL = - // INTERNAL is never reconciled, so the state value is irrelevant. - new PeerTagSchema(new String[] {BASE_SERVICE}, null); - - final String[] names; + private static final Logger log = LoggerFactory.getLogger(PeerTagSchema.class); /** - * Precomputed {@code Arrays.hashCode(names)}. The schema is shared across many publishes so - * recomputing it on the aggregator hot path (per-publish call to {@code AggregateEntry.hashOf}) - * was waste -- it showed up as a top aggregator-thread sample. Cached here, computed once at - * construction. + * Sentinel {@link #state} for schemas that are never reconciled against feature discovery: the + * {@link #INTERNAL} singleton and test-built schemas. A {@code null} state always mismatches a + * real discovery hash, so a schema built with it would rebuild on first reconcile -- but neither + * of these schemas takes that path. */ - final int namesHash; + static final String NO_STATE = null; + + /** Singleton schema for internal-kind spans -- only {@code base.service}. */ + static final PeerTagSchema INTERNAL = new PeerTagSchema(new String[] {BASE_SERVICE}, NO_STATE); + + final String[] names; + final TagCardinalityHandler[] handlers; /** * The {@code DDAgentFeaturesDiscovery.state()} hash this schema was built from. The aggregator @@ -60,30 +66,32 @@ final class PeerTagSchema { */ String state; - private PeerTagSchema(String[] names, String state) { - this.names = names; - this.namesHash = java.util.Arrays.hashCode(names); - this.state = state; - } - /** Builds a schema for the given peer-tag names. Order is determined by the {@link Set}. */ - static PeerTagSchema of(Set tags, String state) { - return new PeerTagSchema(tags.toArray(new String[0]), state); + static PeerTagSchema of(Set names, String state) { + return new PeerTagSchema(names.toArray(new String[0]), state); } - /** - * Test-only factory that takes the names array directly so tests can build a schema in a specific - * order without going through a {@link Set}. - */ + /** Test-only factory: takes names array directly to build a schema in a specific order. */ static PeerTagSchema testSchema(String[] names) { - return new PeerTagSchema(names, null); + return new PeerTagSchema(names, NO_STATE); + } + + private PeerTagSchema(String[] names, String state) { + this.names = names; + this.state = state; + this.handlers = new TagCardinalityHandler[names.length]; + for (int i = 0; i < names.length; i++) { + this.handlers[i] = + new TagCardinalityHandler( + names[i], MetricCardinalityLimits.PEER_TAG_VALUE, AggregateEntry.LIMITS_ENABLED); + } } /** * Whether this schema's tag names exactly match {@code other}. Used by the aggregator's reconcile * path: when a feature discovery refresh changes {@link DDAgentFeaturesDiscovery#state()} but the - * resulting set is unchanged, the aggregator can keep this schema and just update {@link #state} - * instead of rebuilding. + * resulting set is unchanged, the aggregator can keep this schema (and its warm cardinality + * handlers) and just update {@link #state} instead of rebuilding. */ boolean hasSameTagsAs(Set other) { if (this.names.length != other.size()) { @@ -97,7 +105,38 @@ boolean hasSameTagsAs(Set other) { return true; } + /** + * Canonicalizes the peer-tag value at slot {@code i}. Returns {@link UTF8BytesString#EMPTY} for + * null inputs and the handler's {@code ":tracer_blocked_value"} sentinel when the per-tag + * cardinality budget is exhausted. + */ + UTF8BytesString register(int i, String value) { + return handlers[i].register(value); + } + + /** + * Resets every {@link TagCardinalityHandler}'s working set, flushes accumulated per-tag block + * counts to {@link HealthMetrics}, and emits a warn log for each tag that hit its limit this + * cycle. Must be called on the aggregator thread; handlers are not thread-safe. + */ + void resetHandlers(HealthMetrics healthMetrics) { + for (int i = 0; i < handlers.length; i++) { + long blocked = handlers[i].reset(); + if (blocked > 0) { + log.warn( + "Cardinality limit reached for peer tag '{}'; further values are reported as" + + " 'tracer_blocked_value' until the next reporting cycle", + names[i]); + healthMetrics.onTagCardinalityBlocked(handlers[i].statsDTag(), blocked); + } + } + } + int size() { return names.length; } + + String name(int i) { + return names[i]; + } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/PropertyCardinalityHandler.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/PropertyCardinalityHandler.java new file mode 100644 index 00000000000..cdb7aa91fd5 --- /dev/null +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/PropertyCardinalityHandler.java @@ -0,0 +1,187 @@ +package datadog.trace.common.metrics; + +import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; +import java.util.Arrays; + +/** + * Cardinality-capped UTF8 canonicalizer for one property field. + * + *

    Dual role -- limiter and cache. Prior versions ran a per-field {@code DDCache} for UTF8 + * reuse with a separate global cardinality cap on top. Under high load that wasn't enough to stave + * off long GC cycles: every miss still concatenated / UTF8-encoded the value before the cache could + * store it. A cardinality limiter and a recent-value cache are both sets of recently used + * values, so this class collapses them into one structure. Cardinality limiting happens first, + * which lets the blocked path skip the concatenation and encoding entirely. + * + *

    A pure limiter would fully reset each reporting cycle and destroy the cache. To preserve UTF8 + * reuse across resets, the handler keeps the previous cycle's entries verbatim in a parallel table + * and reuses any matching {@link UTF8BytesString} when a value first appears in the new cycle. + * + *

    Accepts any {@link CharSequence} input -- mixed {@code String}/{@code UTF8BytesString} of the + * same content collapse to one slot because {@link UTF8BytesString#hashCode()} delegates to the + * underlying String's hash and probe equality is the content-based {@code + * stored.toString().contentEquals(value)} (which fast-paths to {@code String.equals} when the input + * is a String). + * + *

    Storage: open-addressed flat arrays with linear probing. Two parallel {@code + * UTF8BytesString[]} tables -- "current cycle" and "prior cycle". Capacity is the next power of two + * {@code >= 2 * cardinalityLimit} so probes stay short even at the full budget. The stored + * UTF8BytesString carries the slot's identity directly; no parallel keys array needed. + * + *

      + *
    • The current table tracks which values have consumed a slot of the cardinality budget this + * reporting cycle. Once {@link #cardinalityLimit} distinct values are present, further + * first-time values get the {@code tracer_blocked_value} sentinel. + *
    • The prior table holds the previous cycle's entries verbatim. A first-time-this-cycle value + * that hits in the prior table reuses its {@link UTF8BytesString} instance -- no + * re-allocation -- and stores that reference in the current table. + *
    + * + *

    Reset: swap the current and prior pointers, then null the (now) current. One + * O(capacity) pass; half the work of a copy-then-null. Workloads with a stable value set across + * cycles pay zero UTF8 allocations after the first cycle, and the reused instances also + * short-circuit downstream equality to identity comparisons. + */ +final class PropertyCardinalityHandler { + final String name; + private final int cardinalityLimit; + private final int capacityMask; + + /** + * Whether to substitute the {@code tracer_blocked_value} sentinel when the per-cycle budget is + * exhausted. With limits enabled (sentinel mode), overflow values collapse to one bucket; with + * limits disabled, the cache size is still bounded by {@link #cardinalityLimit} but over-budget + * values get freshly-allocated {@link UTF8BytesString}s instead, so the wire format carries the + * real value and entries don't collapse. Prior-cycle reuse runs in either mode. + */ + private final boolean useBlockedSentinel; + + // Single open-addressed table per cycle. The stored UTF8BytesString IS the slot identity -- + // equality is checked by comparing its underlying String against the incoming CharSequence. + private UTF8BytesString[] curValues; + private UTF8BytesString[] priorValues; + private int curSize; + + private UTF8BytesString cacheBlocked = null; + private String[] statsDTag = null; + + /** Accumulated block count for the current cycle. Returned and zeroed by {@link #reset()}. */ + private long blockedCount; + + /** + * Test convenience: limits-enabled mode (blocked sentinel substitution active). Production uses + * the three-argument constructor with the flag from {@code Config}. + */ + PropertyCardinalityHandler(String name, int cardinalityLimit) { + this(name, cardinalityLimit, true); + } + + PropertyCardinalityHandler(String name, int cardinalityLimit, boolean useBlockedSentinel) { + this.name = name; + if (cardinalityLimit <= 0) { + throw new IllegalArgumentException("cardinalityLimit must be positive: " + cardinalityLimit); + } + // Upper bound prevents overflow in the (cardinalityLimit * 2 - 1) capacity calc below. + // Practical limits are 8..512; this cap is well beyond any realistic configuration. + if (cardinalityLimit > (1 << 29)) { + throw new IllegalArgumentException( + "cardinalityLimit must be at most 2^29: " + cardinalityLimit); + } + this.cardinalityLimit = cardinalityLimit; + this.useBlockedSentinel = useBlockedSentinel; + // Capacity = next power of two >= 2 * cardinalityLimit. Linear-probing load factor stays + // <= 0.5 even when the budget is full, which keeps probe chains short. + final int capacity = Integer.highestOneBit(cardinalityLimit * 2 - 1) << 1; + this.capacityMask = capacity - 1; + this.curValues = new UTF8BytesString[capacity]; + this.priorValues = new UTF8BytesString[capacity]; + } + + /** + * Canonicalizes {@code value} through the cardinality budget and per-cycle reuse cache. Null + * inputs map to {@link UTF8BytesString#EMPTY} -- callers don't need to pre-check. + * + *

    Hash is computed once and reused as the probe start for both the current-cycle table and (on + * miss-with-budget) the prior-cycle table; mixing with the upper half ({@code h ^ (h >>> 16)}) + * keeps inputs sharing a low-bit pattern off the same probe chain. {@link + * UTF8BytesString#hashCode} is content-stable with the underlying String, so a String input and a + * UTF8BytesString input carrying the same content map to the same slot. + */ + UTF8BytesString register(CharSequence value) { + if (value == null) { + return UTF8BytesString.EMPTY; + } + int h = value.hashCode(); + int start = (h ^ (h >>> 16)) & this.capacityMask; + + int slot = start; + UTF8BytesString existing; + while ((existing = this.curValues[slot]) != null + && existing != value + && !existing.toString().contentEquals(value)) { + slot = (slot + 1) & this.capacityMask; + } + if (existing != null) { + // Already seen this cycle -- consumed a budget slot earlier; reuse the cached UTF8. + return existing; + } + boolean capExhausted = this.curSize >= this.cardinalityLimit; + if (capExhausted && this.useBlockedSentinel) { + this.blockedCount++; + return this.tracerBlockedValue(); + } + // Reuse from the prior cycle if possible to avoid re-allocation -- runs whether or not the + // current budget is exhausted, so persistent values keep their UTF8 instance across cycles. + int priorSlot = start; + UTF8BytesString priorMatch; + while ((priorMatch = this.priorValues[priorSlot]) != null + && priorMatch != value + && !priorMatch.toString().contentEquals(value)) { + priorSlot = (priorSlot + 1) & this.capacityMask; + } + UTF8BytesString utf8 = priorMatch != null ? priorMatch : UTF8BytesString.create(value); + if (!capExhausted) { + // Budget remaining: claim a slot for future hits this cycle. + this.curValues[slot] = utf8; + this.curSize += 1; + } + // capExhausted && !useBlockedSentinel: return the value without caching (cache is full). + // Repeat over-budget values pay the prior-cycle probe each call but skip allocation as long + // as the prior table still holds them. + return utf8; + } + + private UTF8BytesString tracerBlockedValue() { + UTF8BytesString cacheBlocked = this.cacheBlocked; + if (cacheBlocked != null) return cacheBlocked; + + this.cacheBlocked = cacheBlocked = UTF8BytesString.create("tracer_blocked_value"); + return cacheBlocked; + } + + /** + * Resets the per-cycle working set and returns the accumulated block count for this cycle. The + * caller is responsible for reporting the count to health metrics if non-zero. + */ + String[] statsDTag() { + if (statsDTag == null) { + statsDTag = new String[] {"collapsed:" + name}; + } + return statsDTag; + } + + long reset() { + long count = this.blockedCount; + this.blockedCount = 0; + // Flip pointers: the just-completed cycle becomes prior; what was prior (2 cycles ago) is + // recycled into the new (empty) current. + final UTF8BytesString[] tmp = this.priorValues; + this.priorValues = this.curValues; + this.curValues = tmp; + // Null the new current. The values pulled out of prior are still reachable through any + // AggregateEntry rows they ended up populating; this just drops the handler's references. + Arrays.fill(this.curValues, null); + this.curSize = 0; + return count; + } +} diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SerializingMetricWriter.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SerializingMetricWriter.java index 972bd1e86ed..622a4a14cb0 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SerializingMetricWriter.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SerializingMetricWriter.java @@ -91,7 +91,7 @@ public SerializingMetricWriter( this.buffer = new GrowableBuffer(initialCapacity); this.writer = new MsgPackWriter(buffer); this.sink = sink; - this.gitInfoProvider = new GitInfoProvider(); + this.gitInfoProvider = gitInfoProvider; } @Override @@ -151,11 +151,12 @@ public void startBucket(int metricCount, long start, long duration) { @Override public void add(AggregateEntry entry) { - // Calculate dynamic map size based on optional fields - final boolean hasHttpMethod = entry.getHttpMethod() != null; - final boolean hasHttpEndpoint = entry.getHttpEndpoint() != null; - final boolean hasServiceSource = entry.getServiceSource() != null; - final boolean hasGrpcStatusCode = entry.getGrpcStatusCode() != null; + // Dynamic map size based on optional fields; AggregateEntry encapsulates the EMPTY-as-absent + // sentinel via its hasFoo() predicates so the serializer doesn't depend on the storage choice. + final boolean hasHttpMethod = entry.hasHttpMethod(); + final boolean hasHttpEndpoint = entry.hasHttpEndpoint(); + final boolean hasServiceSource = entry.hasServiceSource(); + final boolean hasGrpcStatusCode = entry.hasGrpcStatusCode(); final int mapSize = 15 + (hasServiceSource ? 1 : 0) diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java index 152ac42bb55..8bbc6a29edb 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java @@ -1,5 +1,7 @@ package datadog.trace.common.metrics; +import javax.annotation.Nullable; + /** * Immutable per-span value posted from the producer to the aggregator thread. Carries the raw * inputs the aggregator needs to look up or build an {@link AggregateEntry} and update its @@ -22,21 +24,21 @@ final class SpanSnapshot implements InboxItem { /** * Schema for {@link #peerTagValues}. {@code null} when the span has no peer tags. The schema - * carries the names in parallel-array form; {@code peerTagValues} holds the per-span tag values - * at the same indices. + * carries the names + {@link TagCardinalityHandler}s in parallel array form; {@code + * peerTagValues} holds the per-span tag values at the same indices. */ - final PeerTagSchema peerTagSchema; + @Nullable final PeerTagSchema peerTagSchema; /** * Peer tag values captured from the span, parallel to {@code peerTagSchema.names}. A {@code null} * entry means the span didn't have that peer tag set. {@code null} (the whole array) when {@link * #peerTagSchema} is {@code null}. */ - final String[] peerTagValues; + @Nullable final String[] peerTagValues; - final String httpMethod; - final String httpEndpoint; - final String grpcStatusCode; + @Nullable final String httpMethod; + @Nullable final String httpEndpoint; + @Nullable final String grpcStatusCode; /** Duration in nanoseconds, OR-ed with {@code ERROR_TAG} / {@code TOP_LEVEL_TAG} as needed. */ final long tagAndDuration; @@ -51,11 +53,11 @@ final class SpanSnapshot implements InboxItem { boolean synthetic, boolean traceRoot, String spanKind, - PeerTagSchema peerTagSchema, - String[] peerTagValues, - String httpMethod, - String httpEndpoint, - String grpcStatusCode, + @Nullable PeerTagSchema peerTagSchema, + @Nullable String[] peerTagValues, + @Nullable String httpMethod, + @Nullable String httpEndpoint, + @Nullable String grpcStatusCode, long tagAndDuration) { this.resourceName = resourceName; this.serviceName = serviceName; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/TagCardinalityHandler.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/TagCardinalityHandler.java new file mode 100644 index 00000000000..707541c922c --- /dev/null +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/TagCardinalityHandler.java @@ -0,0 +1,153 @@ +package datadog.trace.common.metrics; + +import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.util.Arrays; + +/** + * Cardinality-capped UTF8 canonicalizer for one peer-tag name. Output is the pre-encoded {@code + * "tag:value"} form the serializer writes. + * + *

    Like {@link PropertyCardinalityHandler}, this serves a dual role -- cardinality limiter and + * UTF8 cache fused into one set of recently used values, with the prior cycle's entries retained so + * UTF8 reuse survives the per-cycle reset. See {@link PropertyCardinalityHandler} for the full + * rationale and storage layout. + * + *

    The structural difference here is that the cached {@link UTF8BytesString} holds the {@code + * "tag:value"} concatenation rather than the bare value, so a parallel {@code String[]} keys table + * is needed to probe by the raw value. + */ +final class TagCardinalityHandler { + private final String tag; + private String[] statsDTag = null; + private final int cardinalityLimit; + private final int capacityMask; + + /** See {@link PropertyCardinalityHandler}'s field of the same name. */ + private final boolean useBlockedSentinel; + + private String[] curKeys; + private UTF8BytesString[] curValues; + private String[] priorKeys; + private UTF8BytesString[] priorValues; + private int curSize; + + private UTF8BytesString cacheBlocked = null; + + /** Accumulated block count for the current cycle. Returned and zeroed by {@link #reset()}. */ + private long blockedCount; + + /** + * Test convenience: limits-enabled mode. Production uses the three-argument constructor with the + * flag from {@code Config}. + */ + TagCardinalityHandler(String tag, int cardinalityLimit) { + this(tag, cardinalityLimit, true); + } + + TagCardinalityHandler(String tag, int cardinalityLimit, boolean useBlockedSentinel) { + if (cardinalityLimit <= 0) { + throw new IllegalArgumentException("cardinalityLimit must be positive: " + cardinalityLimit); + } + // Upper bound prevents overflow in the (cardinalityLimit * 2 - 1) capacity calc below. + if (cardinalityLimit > (1 << 29)) { + throw new IllegalArgumentException( + "cardinalityLimit must be at most 2^29: " + cardinalityLimit); + } + this.tag = tag; + this.cardinalityLimit = cardinalityLimit; + this.useBlockedSentinel = useBlockedSentinel; + final int capacity = Integer.highestOneBit(cardinalityLimit * 2 - 1) << 1; + this.capacityMask = capacity - 1; + this.curKeys = new String[capacity]; + this.curValues = new UTF8BytesString[capacity]; + this.priorKeys = new String[capacity]; + this.priorValues = new UTF8BytesString[capacity]; + } + + /** + * Canonicalizes {@code value} through the cardinality budget and per-cycle reuse cache. Null + * inputs map to {@link UTF8BytesString#EMPTY} -- callers don't need to pre-check. + * + *

    Hash is computed once and reused as the probe start for both the current-cycle table and (on + * miss-with-budget) the prior-cycle table; mixing with the upper half ({@code h ^ (h >>> 16)}) + * keeps inputs sharing a low-bit pattern off the same probe chain. + */ + @SuppressFBWarnings( + value = "ES_COMPARING_PARAMETER_STRING_WITH_EQ", + justification = + "Intentional identity fast-path: the reference check short-circuits the .equals() call" + + " when the stored key and probe value are the same instance.") + UTF8BytesString register(String value) { + if (value == null) { + return UTF8BytesString.EMPTY; + } + int h = value.hashCode(); + int start = (h ^ (h >>> 16)) & this.capacityMask; + + int slot = start; + String curKey; + while ((curKey = this.curKeys[slot]) != null && curKey != value && !curKey.equals(value)) { + slot = (slot + 1) & this.capacityMask; + } + if (curKey != null) { + return this.curValues[slot]; + } + boolean capExhausted = this.curSize >= this.cardinalityLimit; + if (capExhausted && this.useBlockedSentinel) { + this.blockedCount++; + return this.tracerBlockedValue(); + } + int priorSlot = start; + String priorKey; + while ((priorKey = this.priorKeys[priorSlot]) != null + && priorKey != value + && !priorKey.equals(value)) { + priorSlot = (priorSlot + 1) & this.capacityMask; + } + UTF8BytesString utf8 = + priorKey != null + ? this.priorValues[priorSlot] + : UTF8BytesString.create(this.tag + ":" + value); + if (!capExhausted) { + this.curKeys[slot] = value; + this.curValues[slot] = utf8; + this.curSize += 1; + } + return utf8; + } + + private UTF8BytesString tracerBlockedValue() { + UTF8BytesString cacheBlocked = this.cacheBlocked; + if (cacheBlocked != null) return cacheBlocked; + + this.cacheBlocked = cacheBlocked = UTF8BytesString.create(this.tag + ":tracer_blocked_value"); + return cacheBlocked; + } + + String[] statsDTag() { + if (statsDTag == null) { + statsDTag = new String[] {"collapsed:" + tag}; + } + return statsDTag; + } + + /** + * Resets the per-cycle working set and returns the accumulated block count for this cycle. The + * caller is responsible for reporting the count to health metrics if non-zero. + */ + long reset() { + long count = this.blockedCount; + this.blockedCount = 0; + final String[] tmpKeys = this.priorKeys; + final UTF8BytesString[] tmpValues = this.priorValues; + this.priorKeys = this.curKeys; + this.priorValues = this.curValues; + this.curKeys = tmpKeys; + this.curValues = tmpValues; + Arrays.fill(this.curKeys, null); + Arrays.fill(this.curValues, null); + this.curSize = 0; + return count; + } +} diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreSpan.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreSpan.java index a6ced35967c..a4e854041b9 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreSpan.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreSpan.java @@ -82,8 +82,17 @@ default U unsafeGetTag(CharSequence name) { boolean isForceKeep(); default boolean isKind(SpanKindFilter filter) { - Object kind = unsafeGetTag(Tags.SPAN_KIND); - return filter.matches(kind == null ? null : kind.toString()); + return filter.matches(getSpanKindString()); + } + + /** + * Returns the {@code span.kind} tag value as a String, or {@code null} if not set. Default + * implementation reads the tag map; {@link DDSpan} overrides to use a cached ordinal that + * resolves via a small lookup array, skipping the tag-map lookup on the hot path. + */ + default String getSpanKindString() { + Object v = unsafeGetTag(Tags.SPAN_KIND); + return v == null ? null : v.toString(); } CharSequence getType(); diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java index 375480a8792..2a26f6dbca4 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java @@ -103,7 +103,6 @@ import datadog.trace.core.propagation.PropagationTags; import datadog.trace.core.propagation.TracingPropagator; import datadog.trace.core.propagation.XRayPropagator; -import datadog.trace.core.propagation.opg.OrgGuard; import datadog.trace.core.scopemanager.ContinuableScopeManager; import datadog.trace.core.servicediscovery.ServiceDiscovery; import datadog.trace.core.servicediscovery.ServiceDiscoveryFactory; @@ -825,21 +824,11 @@ private CoreTracer( sharedCommunicationObjects.whenReady(this.dataStreamsMonitoring::start); - propagationTagsFactory = PropagationTags.factory(config); - // Register context propagators - HttpCodec.Extractor baseExtractor = + HttpCodec.Extractor tracingExtractor = extractor == null ? HttpCodec.createExtractor(config, this::captureTraceConfig) : extractor; - OrgGuard orgGuard = - OrgGuard.create( - config, - featuresDiscovery::getOrgPropagationMarker, - propagationTagsFactory, - this.healthMetrics); - HttpCodec.Extractor tracingExtractor = orgGuard.decorateExtractor(baseExtractor); - HttpCodec.Injector tracingInjector = orgGuard.decorateInjector(injector); TracingPropagator tracingPropagator = - new TracingPropagator(config.isApmTracingEnabled(), tracingInjector, tracingExtractor); + new TracingPropagator(config.isApmTracingEnabled(), injector, tracingExtractor); Propagators.register(TRACING_CONCERN, tracingPropagator); Propagators.register(XRAY_TRACING_CONCERN, new XRayPropagator(config), false); if (config.isDataStreamsEnabled()) { @@ -897,6 +886,7 @@ private CoreTracer( StatusLogger.logStatus(config); + propagationTagsFactory = PropagationTags.factory(config); this.profilingContextIntegration = profilingContextIntegration; this.injectBaggageAsTags = injectBaggageAsTags; this.injectLinksAsTags = injectLinksAsTags; diff --git a/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java b/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java index 8ffcc77b49c..8ff07ed3ca4 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java @@ -965,6 +965,11 @@ public boolean isKind(SpanKindFilter filter) { return filter.matches(context.getSpanKindOrdinal()); } + @Override + public String getSpanKindString() { + return context.getSpanKindString(); + } + @Override public void copyPropagationAndBaggage(final AgentSpan source) { if (source instanceof DDSpan) { diff --git a/dd-trace-core/src/main/java/datadog/trace/core/monitor/HealthMetrics.java b/dd-trace-core/src/main/java/datadog/trace/core/monitor/HealthMetrics.java index 33e26267eaa..e506732777f 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/monitor/HealthMetrics.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/monitor/HealthMetrics.java @@ -105,6 +105,15 @@ public void onStatsAggregateDropped() {} */ public void onStatsInboxFull() {} + /** + * Reports a batch of {@code count} tag values collapsed into the {@code blocked_by_tracer} + * sentinel for {@code tag} during the just-completed reporting cycle (per-tag cardinality budget + * exhausted, or per-value length cap exceeded). Called from the aggregator thread once per + * affected tag at cycle reset, so the implementation can do a single counter update rather than + * one per blocked value. + */ + public void onTagCardinalityBlocked(String[] statsDTag, long count) {} + /** * @return Human-readable summary of the current health metrics. */ diff --git a/dd-trace-core/src/main/java/datadog/trace/core/monitor/TracerHealthMetrics.java b/dd-trace-core/src/main/java/datadog/trace/core/monitor/TracerHealthMetrics.java index 2716d19e967..f9c76ff0766 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/monitor/TracerHealthMetrics.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/monitor/TracerHealthMetrics.java @@ -31,6 +31,7 @@ public class TracerHealthMetrics extends HealthMetrics implements AutoCloseable httpStatus -> new String[] {"status:" + httpStatus}; private static final String[] NO_TAGS = new String[0]; + private static final String[] COLLAPSED_WHOLE_KEY_TAGS = new String[] {"collapsed:whole_key"}; private static final String[] STATUS_OK_TAGS = STATUS_TAGS.apply(200); private final RadixTreeCache statusTagsCache = new RadixTreeCache<>(16, 32, STATUS_TAGS, 200, 400); @@ -372,6 +373,7 @@ public void onClientStatErrorReceived() { @Override public void onStatsAggregateDropped() { statsAggregateDropped.increment(); + statsd.count("datadog.tracer.stats.collapsed_spans", 1, COLLAPSED_WHOLE_KEY_TAGS); } @Override @@ -379,6 +381,11 @@ public void onStatsInboxFull() { statsInboxFull.increment(); } + @Override + public void onTagCardinalityBlocked(String[] statsDTag, long count) { + statsd.count("datadog.tracer.stats.collapsed_spans", count, statsDTag); + } + @Override public void close() { if (null != cancellation) { diff --git a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ClientStatsAggregatorTest.groovy similarity index 88% rename from dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy rename to dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ClientStatsAggregatorTest.groovy index 00bd706b8fb..69d74b392ff 100644 --- a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy +++ b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ClientStatsAggregatorTest.groovy @@ -18,7 +18,11 @@ import java.util.concurrent.TimeoutException import java.util.function.Supplier import spock.lang.Shared -class ConflatingMetricAggregatorTest extends DDSpecification { +class ClientStatsAggregatorTest extends DDSpecification { + + def setup() { + AggregateEntry.resetCardinalityHandlers() + } static Set empty = new HashSet<>() @@ -35,7 +39,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true WellKnownTags wellKnownTags = new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language") - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = new ClientStatsAggregator( wellKnownTags, empty, features, @@ -65,7 +69,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true WellKnownTags wellKnownTags = new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language") - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = new ClientStatsAggregator( wellKnownTags, [ignoredResourceName].toSet(), features, @@ -103,7 +107,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) aggregator.start() @@ -120,7 +124,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(1, _, _) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( null, "service", "operation", @@ -151,7 +155,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) aggregator.start() @@ -168,7 +172,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(1, _, _) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -199,7 +203,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, true) aggregator.start() @@ -222,7 +226,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { (statsComputed ? 1 : 0) * writer.startBucket(1, _, _) (statsComputed ? 1 : 0) * writer.add({ AggregateEntryTestUtils.equals(it, - AggregateEntryTestUtils.of( + AggregateEntry.of( "resource", "service", "operation", @@ -259,49 +263,37 @@ class ConflatingMetricAggregatorTest extends DDSpecification { "client" | "GET" | "/external/api" | true } - def "should create bucket for each set of peer tags"() { + def "should create separate buckets for distinct peer tag values"() { + // Peer-tag NAMES are configured per-tracer and stable for the duration of a trace publish; + // peer-tag VALUES vary per-span. Two spans with the same names but different values should + // produce two distinct aggregate buckets. setup: - // Peer-tag schema is reconciled with feature discovery once per reporting cycle (on the - // aggregator thread, in the post-report hook), not per-span on the producer. Drive two - // reporting cycles with different peerTags() configurations to verify the aggregator buckets - // each cycle by the schema that was current at publish time. MetricWriter writer = Mock(MetricWriter) Sink sink = Stub(Sink) DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true - features.peerTags() >>> [["country"], ["country", "georegion"]] - // Bump the discovered state hash so reconcile during report cycle 1 sees a mismatch and - // rebuilds the schema for span 2. Three calls: bootstrap (span1's publish), reconcile-during- - // report-1 (mismatch -> rebuild + 2nd peerTags() call), reconcile-during-report-2 (no change). - features.state() >>> ["state-1", "state-2", "state-2"] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + features.peerTags() >> ["country", "georegion"] + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) aggregator.start() - when: "cycle 1 -- peerTags=[country]" - CountDownLatch latch1 = new CountDownLatch(1) + when: + CountDownLatch latch = new CountDownLatch(1) aggregator.publish([ new SimpleSpan("service", "operation", "resource", "type", true, false, false, 0, 100, HTTP_OK) - .setTag(SPAN_KIND, "client").setTag("country", "france").setTag("georegion", "europe") - ]) - aggregator.report() - def cycle1Triggered = latch1.await(2, SECONDS) - - and: "cycle 2 -- reconcile picks up peerTags=[country, georegion]" - CountDownLatch latch2 = new CountDownLatch(1) - aggregator.publish([ + .setTag(SPAN_KIND, "client").setTag("country", "france").setTag("georegion", "europe"), new SimpleSpan("service", "operation", "resource", "type", true, false, false, 0, 100, HTTP_OK) - .setTag(SPAN_KIND, "client").setTag("country", "france").setTag("georegion", "europe") + .setTag(SPAN_KIND, "client").setTag("country", "germany").setTag("georegion", "europe") ]) aggregator.report() - def cycle2Triggered = latch2.await(2, SECONDS) + def latchTriggered = latch.await(2, SECONDS) then: - cycle1Triggered - cycle2Triggered + latchTriggered + 1 * writer.startBucket(2, _, _) 1 * writer.add({ AggregateEntryTestUtils.equals(it, - AggregateEntryTestUtils.of( + AggregateEntry.of( "resource", "service", "operation", @@ -311,7 +303,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { false, false, "client", - [UTF8BytesString.create("country:france")], + [UTF8BytesString.create("country:france"), UTF8BytesString.create("georegion:europe")], null, null, null @@ -321,7 +313,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { } 1 * writer.add({ AggregateEntryTestUtils.equals(it, - AggregateEntryTestUtils.of( + AggregateEntry.of( "resource", "service", "operation", @@ -331,7 +323,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { false, false, "client", - [UTF8BytesString.create("country:france"), UTF8BytesString.create("georegion:europe")], + [UTF8BytesString.create("country:germany"), UTF8BytesString.create("georegion:europe")], null, null, null @@ -339,7 +331,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { }) >> { AggregateEntry e -> assert e.getHitCount() == 1 && e.getTopLevelCount() == 0 && e.getDuration() == 100 } - 2 * writer.finishBucket() >> { latch1.countDown(); latch2.countDown() } + 1 * writer.finishBucket() >> { latch.countDown() } cleanup: aggregator.close() @@ -352,7 +344,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> ["peer.hostname", "_dd.base_service"] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) aggregator.start() @@ -370,7 +362,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { 1 * writer.startBucket(1, _, _) 1 * writer.add({ AggregateEntryTestUtils.equals(it, - AggregateEntryTestUtils.of( + AggregateEntry.of( "resource", "service", "operation", @@ -407,7 +399,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, features, HealthMetrics.NO_OP, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) aggregator.start() @@ -424,7 +416,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(1, _, _) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -461,7 +453,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) long duration = 100 List trace = [ @@ -485,7 +477,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { 1 * writer.finishBucket() >> { latch.countDown() } 1 * writer.startBucket(2, _, SECONDS.toNanos(reportingInterval)) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -504,7 +496,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { assert e.getHitCount() == count && e.getDuration() == count * duration } 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource2", "service2", "operation2", @@ -537,7 +529,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, true) aggregator.start() @@ -560,7 +552,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(1, _, _) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -603,7 +595,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered2 1 * writer.startBucket(3, _, _) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -622,7 +614,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { assert e.getHitCount() == 1 && e.getDuration() == duration } 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -641,7 +633,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { assert e.getHitCount() == 1 && e.getDuration() == duration * 2 } 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -672,7 +664,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, true) aggregator.start() @@ -707,7 +699,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(4, _, _) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -726,7 +718,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { assert e.getHitCount() == 1 && e.getDuration() == duration } 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -745,7 +737,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { assert e.getHitCount() == 1 && e.getDuration() == duration * 2 } 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -764,7 +756,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { assert e.getHitCount() == 1 && e.getDuration() == duration * 3 } 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -795,7 +787,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, true) aggregator.start() @@ -819,7 +811,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(2, _, _) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -838,7 +830,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { assert e.getHitCount() == 1 && e.getDuration() == duration } 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -869,7 +861,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) aggregator.start() @@ -891,7 +883,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(2, _, _) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -910,7 +902,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { assert e.getHitCount() == 2 && e.getDuration() == 2 * duration } 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service", "operation", @@ -945,7 +937,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, maxAggregates, queueSize, reportingInterval, SECONDS, false) long duration = 100 aggregator.start() @@ -965,7 +957,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(10, _, SECONDS.toNanos(reportingInterval)) for (int i = 0; i < 10; ++i) { - def expected = AggregateEntryTestUtils.of( + def expected = AggregateEntry.of( "resource", "service" + i, "operation", @@ -984,7 +976,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { } } 0 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service10", "operation", @@ -1015,7 +1007,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { features.supportsMetrics() >> true features.peerTags() >> [] HealthMetrics healthMetrics = Mock(HealthMetrics) - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, healthMetrics, sink, writer, maxAggregates, queueSize, reportingInterval, SECONDS, false) long duration = 100 aggregator.start() @@ -1049,7 +1041,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { features.supportsMetrics() >> true features.peerTags() >> [] HealthMetrics healthMetrics = Mock(HealthMetrics) - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, healthMetrics, sink, writer, maxAggregates, queueSize, reportingInterval, SECONDS, false) aggregator.start() @@ -1094,7 +1086,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, maxAggregates, queueSize, reportingInterval, SECONDS, false) long duration = 100 aggregator.start() @@ -1114,7 +1106,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(5, _, SECONDS.toNanos(reportingInterval)) for (int i = 0; i < 5; ++i) { - def expected = AggregateEntryTestUtils.of( + def expected = AggregateEntry.of( "resource", "service" + i, "operation", @@ -1149,7 +1141,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(4, _, SECONDS.toNanos(reportingInterval)) for (int i = 1; i < 5; ++i) { - def expected = AggregateEntryTestUtils.of( + def expected = AggregateEntry.of( "resource", "service" + i, "operation", @@ -1168,7 +1160,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { } } 0 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "resource", "service0", "operation", @@ -1198,7 +1190,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, maxAggregates, queueSize, reportingInterval, SECONDS, false) long duration = 100 aggregator.start() @@ -1218,7 +1210,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(5, _, SECONDS.toNanos(reportingInterval)) for (int i = 0; i < 5; ++i) { - def expected = AggregateEntryTestUtils.of( + def expected = AggregateEntry.of( "resource", "service" + i, "operation", @@ -1258,7 +1250,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, maxAggregates, queueSize, 1, SECONDS, false) long duration = 100 aggregator.start() @@ -1277,7 +1269,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(5, _, SECONDS.toNanos(1)) for (int i = 0; i < 5; ++i) { - def expected = AggregateEntryTestUtils.of( + def expected = AggregateEntry.of( "resource", "service" + i, "operation", @@ -1309,7 +1301,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, maxAggregates, queueSize, 1, SECONDS, false) long duration = 100 aggregator.start() @@ -1340,7 +1332,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { MetricWriter writer = Mock(MetricWriter) Sink sink = Stub(Sink) DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, maxAggregates, queueSize, 1, SECONDS, false) aggregator.start() @@ -1362,7 +1354,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> false features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, 200, MILLISECONDS, false) final spans = [ new SimpleSpan("service", "operation", "resource", "type", false, true, false, 0, 10, HTTP_OK) @@ -1394,7 +1386,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { Sink sink = Stub(Sink) DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, maxAggregates, queueSize, 1, SECONDS, false) when: @@ -1427,7 +1419,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { Sink sink = Stub(Sink) DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) aggregator.start() @@ -1445,7 +1437,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { 1 * writer.startBucket(1, _, _) 1 * writer.add({ AggregateEntryTestUtils.equals(it, - AggregateEntryTestUtils.of( + AggregateEntry.of( "resource", "service", "operation", @@ -1476,7 +1468,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) aggregator.start() @@ -1502,7 +1494,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { 1 * writer.startBucket(1, _, _) 1 * writer.add({ AggregateEntryTestUtils.equals(it, - AggregateEntryTestUtils.of( + AggregateEntry.of( "resource", "service", "operation", @@ -1533,7 +1525,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, true) aggregator.start() @@ -1559,7 +1551,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { 1 * writer.startBucket(3, _, _) 1 * writer.add({ AggregateEntryTestUtils.equals(it, - AggregateEntryTestUtils.of( + AggregateEntry.of( "resource", "service", "operation", @@ -1579,7 +1571,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { } 1 * writer.add({ AggregateEntryTestUtils.equals(it, - AggregateEntryTestUtils.of( + AggregateEntry.of( "resource", "service", "operation", @@ -1599,7 +1591,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { } 1 * writer.add({ AggregateEntryTestUtils.equals(it, - AggregateEntryTestUtils.of( + AggregateEntry.of( "resource", "service", "operation", @@ -1630,7 +1622,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { DDAgentFeaturesDiscovery features = Mock(DDAgentFeaturesDiscovery) features.supportsMetrics() >> true features.peerTags() >> [] - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator(empty, + ClientStatsAggregator aggregator = new ClientStatsAggregator(empty, features, HealthMetrics.NO_OP, sink, writer, 10, queueSize, reportingInterval, SECONDS, false) aggregator.start() @@ -1653,7 +1645,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { latchTriggered 1 * writer.startBucket(3, _, _) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "grpc.service/Method", "service", "grpc.server", @@ -1670,7 +1662,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { )) }) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "grpc.service/Method", "service", "grpc.server", @@ -1687,7 +1679,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification { )) }) 1 * writer.add({ - AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of( + AggregateEntryTestUtils.equals(it,AggregateEntry.of( "GET /api", "service", "http.request", @@ -1709,14 +1701,14 @@ class ConflatingMetricAggregatorTest extends DDSpecification { aggregator.close() } - def reportAndWaitUntilEmpty(ConflatingMetricsAggregator aggregator) { + def reportAndWaitUntilEmpty(ClientStatsAggregator aggregator) { waitUntilEmpty(aggregator) aggregator.report() waitUntilEmpty(aggregator) } - def waitUntilEmpty(ConflatingMetricsAggregator aggregator) { + def waitUntilEmpty(ClientStatsAggregator aggregator) { int i = 0 while (!aggregator.inbox.isEmpty() && i++ < 100) { Thread.sleep(10) diff --git a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/FootprintForkedTest.groovy b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/FootprintForkedTest.groovy index eceedeb1935..86a91c23b3f 100644 --- a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/FootprintForkedTest.groovy +++ b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/FootprintForkedTest.groovy @@ -37,7 +37,7 @@ class FootprintForkedTest extends DDSpecification { it.supportsMetrics() >> true it.peerTags() >> [] } - ConflatingMetricsAggregator aggregator = new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = new ClientStatsAggregator( new WellKnownTags("runtimeid","hostname", "env", "service", "version","language"), [].toSet() as Set, features, diff --git a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/MetricsAggregatorFactoryTest.groovy b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/MetricsAggregatorFactoryTest.groovy index 07f246bf9a9..dc9eb86fde3 100644 --- a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/MetricsAggregatorFactoryTest.groovy +++ b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/MetricsAggregatorFactoryTest.groovy @@ -28,6 +28,6 @@ class MetricsAggregatorFactoryTest extends DDSpecification { expect: def aggregator = MetricsAggregatorFactory.createMetricsAggregator(config, sco, HealthMetrics.NO_OP, ) - assert aggregator instanceof ConflatingMetricsAggregator + assert aggregator instanceof ClientStatsAggregator } } diff --git a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/SerializingMetricWriterTest.groovy b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/SerializingMetricWriterTest.groovy index cc0880bc30a..080a77238e4 100644 --- a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/SerializingMetricWriterTest.groovy +++ b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/SerializingMetricWriterTest.groovy @@ -181,7 +181,7 @@ class SerializingMetricWriterTest extends DDSpecification { def content = [e] - ValidatingSink sink = new ValidatingSink(wellKnownTags, startTime, duration, content) + ValidatingSink sink = new ValidatingSink(wellKnownTags, startTime, duration, content, shaCommit) SerializingMetricWriter writer = new SerializingMetricWriter(wellKnownTags, sink, 128, gitInfoProvider) when: @@ -231,13 +231,15 @@ class SerializingMetricWriterTest extends DDSpecification { private final long duration private boolean validated = false private List content + private final String expectedGitCommitSha ValidatingSink(WellKnownTags wellKnownTags, long startTimeNanos, long duration, - List content) { + List content, String expectedGitCommitSha = null) { this.wellKnownTags = wellKnownTags this.startTimeNanos = startTimeNanos this.duration = duration this.content = content + this.expectedGitCommitSha = expectedGitCommitSha } @Override @@ -248,7 +250,7 @@ class SerializingMetricWriterTest extends DDSpecification { void accept(int messageCount, ByteBuffer buffer) { MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(buffer) int mapSize = unpacker.unpackMapHeader() - String gitCommitSha = GitInfoProvider.INSTANCE.getGitInfo()?.getCommit()?.getSha() + String gitCommitSha = expectedGitCommitSha assert mapSize == (7 + (Config.get().isExperimentalPropagateProcessTagsEnabled() ? 1 : 0) + (gitCommitSha != null ? 1 : 0)) assert unpacker.unpackString() == "RuntimeID" @@ -283,12 +285,13 @@ class SerializingMetricWriterTest extends DDSpecification { int statCount = unpacker.unpackArrayHeader() assert statCount == content.size() for (AggregateEntry entry : content) { + // counters now live on AggregateEntry int metricMapSize = unpacker.unpackMapHeader() // Calculate expected map size based on optional fields - boolean hasHttpMethod = entry.getHttpMethod() != null - boolean hasHttpEndpoint = entry.getHttpEndpoint() != null - boolean hasServiceSource = entry.getServiceSource() != null - boolean hasGrpcStatusCode = entry.getGrpcStatusCode() != null + boolean hasHttpMethod = entry.hasHttpMethod() + boolean hasHttpEndpoint = entry.hasHttpEndpoint() + boolean hasServiceSource = entry.hasServiceSource() + boolean hasGrpcStatusCode = entry.hasGrpcStatusCode() int expectedMapSize = 15 + (hasServiceSource ? 1 : 0) + (hasHttpMethod ? 1 : 0) + (hasHttpEndpoint ? 1 : 0) + (hasGrpcStatusCode ? 1 : 0) assert metricMapSize == expectedMapSize int elementCount = 0 diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java index 7fd767533c7..7d1aa023a69 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java @@ -1,10 +1,11 @@ package datadog.trace.common.metrics; +import static datadog.trace.bootstrap.instrumentation.api.UTF8BytesString.EMPTY; import static datadog.trace.common.metrics.AggregateEntry.ERROR_TAG; import static datadog.trace.common.metrics.AggregateEntry.TOP_LEVEL_TAG; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; import datadog.metrics.agent.AgentMeter; @@ -84,82 +85,46 @@ void okAndErrorLatenciesTrackedSeparately() { } @Test - void testUtilsEqualsIsConsistentWithHashCodeAcrossDifferentSchemaLayouts() { - // Contract test for AggregateEntryTestUtils (the test-side equality helper used by Spock - // mock matchers). Production AggregateEntry has no equals override. - // - // Two entries with identical encoded peerTags but different raw layouts must not be equal, - // because hashOf folds in the raw arrays. Equality on the encoded list would let them - // collapse while their hashCodes differ -- violating the contract. - // - // A: schema ["a","b"], values [null,"x"] -> encoded ["b:x"] - // B: schema ["b","c"], values ["x",null] -> encoded ["b:x"] - AggregateEntry a = - AggregateEntryTestUtils.forSnapshot( - snapshotWithPeerTags(new String[] {"a", "b"}, new String[] {null, "x"})); - AggregateEntry b = - AggregateEntryTestUtils.forSnapshot( - snapshotWithPeerTags(new String[] {"b", "c"}, new String[] {"x", null})); - - // Sanity: same encoded peer tags, despite different raw layout. - assertEquals(a.getPeerTags(), b.getPeerTags()); - - // Different raw layouts -> entries must not be equal via the test helper. - assertFalse(AggregateEntryTestUtils.equals(a, b)); - // And different hashCodes (matching the inequality). - assertNotEquals(AggregateEntryTestUtils.hashCode(a), AggregateEntryTestUtils.hashCode(b)); + void absentOptionalFieldsResolveToEmptySentinel() { + // serviceSource / httpMethod / httpEndpoint / grpcStatusCode = null on input -> EMPTY on the + // entry. EMPTY is the universal "absent" sentinel; SerializingMetricWriter and equality use + // identity comparison against it. + AggregateEntry entry = newEntry(); + assertSame(EMPTY, entry.getServiceSource()); + assertSame(EMPTY, entry.getHttpMethod()); + assertSame(EMPTY, entry.getHttpEndpoint()); + assertSame(EMPTY, entry.getGrpcStatusCode()); } @Test - void testUtilsEqualEntriesHaveEqualHashCodes() { - AggregateEntry a = - AggregateEntryTestUtils.forSnapshot( - snapshotWithPeerTags(new String[] {"a", "b"}, new String[] {null, "x"})); - AggregateEntry b = - AggregateEntryTestUtils.forSnapshot( - snapshotWithPeerTags(new String[] {"a", "b"}, new String[] {null, "x"})); - - assertTrue(AggregateEntryTestUtils.equals(a, b)); - assertEquals(AggregateEntryTestUtils.hashCode(a), AggregateEntryTestUtils.hashCode(b)); - } - - private static SpanSnapshot snapshotWithPeerTags(String[] names, String[] values) { - return new SpanSnapshot( - "resource", - "svc", - "op", - null, - "type", - (short) 200, - false, - true, - "client", - PeerTagSchema.testSchema(names), - values, - null, - null, - null, - 0L); - } - - private static AggregateEntry newEntry() { - SpanSnapshot snapshot = - new SpanSnapshot( + void presentOptionalFieldsCarryTheirValue() { + AggregateEntry entry = + AggregateEntry.of( "resource", "svc", "op", - null, + "src", "type", - (short) 200, + 200, false, true, "client", null, - null, - null, - null, - null, - 0L); - return AggregateEntryTestUtils.forSnapshot(snapshot); + "GET", + "/api/v1/foo", + "0"); + assertNotSame(EMPTY, entry.getServiceSource()); + assertNotSame(EMPTY, entry.getHttpMethod()); + assertNotSame(EMPTY, entry.getHttpEndpoint()); + assertNotSame(EMPTY, entry.getGrpcStatusCode()); + assertEquals("src", entry.getServiceSource().toString()); + assertEquals("GET", entry.getHttpMethod().toString()); + assertEquals("/api/v1/foo", entry.getHttpEndpoint().toString()); + assertEquals("0", entry.getGrpcStatusCode().toString()); + } + + private static AggregateEntry newEntry() { + return AggregateEntry.of( + "resource", "svc", "op", null, "type", 200, false, true, "client", null, null, null, null); } } diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java index ed6fd5a3a7e..ad40a2a0b67 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java @@ -1,7 +1,6 @@ package datadog.trace.common.metrics; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; -import java.util.Arrays; import java.util.List; import java.util.Objects; import javax.annotation.Nullable; @@ -10,34 +9,25 @@ * Test-side helpers for {@link AggregateEntry}: a positional-args fixture factory plus a field-wise * equality contract for use with Spock mock argument matchers and JUnit assertions. Lives in {@code * src/test} so the production class stays free of test-only API; same {@code - * datadog.trace.common.metrics} package so this helper can reach package-private fields and - * constructors. + * datadog.trace.common.metrics} package so this helper can reach package-private members. * *

    Production {@code AggregateEntry} intentionally has no {@code equals}/{@code hashCode} - * override -- {@link AggregateTable} bucketing goes through {@link AggregateEntry#matches} keyed on - * {@link AggregateEntry#keyHash}, and no production code path invokes {@link Object#equals}. + * override -- {@link AggregateTable} bucketing goes through the {@code Canonical} scratch buffer + * keyed on {@link AggregateEntry#keyHash}, and no production code path invokes {@link + * Object#equals}. * - *

    The equality helper compares the raw {@code peerTagNames}/{@code peerTagValues} arrays (not - * the encoded {@code peerTags} list) so it stays consistent with {@link AggregateEntry#hashOf}, - * which folds in raw arrays via {@link PeerTagSchema#hashCode()} and {@link - * Arrays#hashCode(Object[])}. Comparing the encoded list would let two entries with different raw - * layouts (e.g. tag {@code "b"} at index 1 in schema A vs index 0 in schema B, with matching - * values) collapse to the same encoded form -- a real bug surfaced during PR #11382 review. + *

    On this branch, peer tags live as a single pre-encoded {@code List} on the + * entry (canonicalization through {@link PeerTagSchema#register} already collapsed identical + * values), so equality compares the list directly. The hash side (computed in {@link + * AggregateEntry#hashOf}) folds in the encoded list, so the contract stays consistent. */ public final class AggregateEntryTestUtils { private AggregateEntryTestUtils() {} /** * Builds an {@link AggregateEntry} from the same positional shape the prior {@code new - * MetricKey(...)} took. Accepts a pre-encoded {@code List} of {@code - * "name:value"} peer tags and recovers the parallel-array {@code (names, values)} form by - * splitting on the {@code ':'} delimiter. - * - *

    Test-only. The split is at the first {@code ':'}, so peer-tag values - * containing a colon (URLs, IPv6 addresses, {@code service:env} patterns) will be silently - * misparsed and the recovered (name, value) pair will be wrong. Keep test data colon-free in - * peer-tag values, or wire a production-style snapshot through {@link #forSnapshot(SpanSnapshot)} - * directly instead. + * MetricKey(...)} took. Passes through to {@link AggregateEntry#of} without touching the + * cardinality handlers. */ public static AggregateEntry of( CharSequence resource, @@ -53,49 +43,20 @@ public static AggregateEntry of( @Nullable CharSequence httpMethod, @Nullable CharSequence httpEndpoint, @Nullable CharSequence grpcStatusCode) { - PeerTagSchema schema = null; - String[] values = null; - if (peerTags != null && !peerTags.isEmpty()) { - String[] names = new String[peerTags.size()]; - values = new String[peerTags.size()]; - int i = 0; - for (UTF8BytesString t : peerTags) { - String s = t.toString(); - int colon = s.indexOf(':'); - names[i] = colon < 0 ? s : s.substring(0, colon); - values[i] = colon < 0 ? "" : s.substring(colon + 1); - i++; - } - schema = PeerTagSchema.testSchema(names); - } - SpanSnapshot syntheticSnapshot = - new SpanSnapshot( - resource, - service == null ? null : service.toString(), - operationName, - serviceSource, - type, - (short) httpStatusCode, - synthetic, - traceRoot, - spanKind == null ? null : spanKind.toString(), - schema, - values, - httpMethod == null ? null : httpMethod.toString(), - httpEndpoint == null ? null : httpEndpoint.toString(), - grpcStatusCode == null ? null : grpcStatusCode.toString(), - 0L); - return forSnapshot(syntheticSnapshot); - } - - /** - * Builds an {@link AggregateEntry} from {@code s} by computing its lookup hash via {@link - * AggregateEntry#hashOf(SpanSnapshot)} and calling the package-private constructor directly. - * Production callers route through {@link AggregateTable#findOrInsert} which already has the - * {@code keyHash} on hand; tests rarely do, so this helper hides the second argument. - */ - public static AggregateEntry forSnapshot(SpanSnapshot s) { - return new AggregateEntry(s, AggregateEntry.hashOf(s)); + return AggregateEntry.of( + resource, + service, + operationName, + serviceSource, + type, + httpStatusCode, + synthetic, + traceRoot, + spanKind, + peerTags, + httpMethod, + httpEndpoint, + grpcStatusCode); } /** @@ -114,8 +75,7 @@ public static boolean equals(AggregateEntry a, AggregateEntry b) { && Objects.equals(a.getServiceSource(), b.getServiceSource()) && Objects.equals(a.getType(), b.getType()) && Objects.equals(a.getSpanKind(), b.getSpanKind()) - && Arrays.equals(a.peerTagNames, b.peerTagNames) - && Arrays.equals(a.peerTagValues, b.peerTagValues) + && a.getPeerTags().equals(b.getPeerTags()) && Objects.equals(a.getHttpMethod(), b.getHttpMethod()) && Objects.equals(a.getHttpEndpoint(), b.getHttpEndpoint()) && Objects.equals(a.getGrpcStatusCode(), b.getGrpcStatusCode()); @@ -123,8 +83,8 @@ public static boolean equals(AggregateEntry a, AggregateEntry b) { /** * Stable hash matching {@link #equals(AggregateEntry, AggregateEntry)} -- derived from {@link - * AggregateEntry#keyHash}, which {@link AggregateEntry#hashOf} computes from the same raw fields - * the helper's {@code equals} compares. + * AggregateEntry#keyHash}, which {@link AggregateEntry#hashOf} computes from the same fields the + * helper's {@code equals} compares. */ public static int hashCode(AggregateEntry e) { return e == null ? 0 : (int) e.keyHash; diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java index 618ead2ab43..8694892ea84 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java @@ -19,6 +19,7 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class AggregateTableTest { @@ -31,6 +32,15 @@ static void initAgentMeter() { monitoring.newTimer("test.init"); } + @BeforeEach + void resetCardinalityHandlers() { + // AggregateEntry's property handlers are static and accumulate state across tests. Some tests + // in this class (e.g. backToBackEvictionsAllSucceed) drive 40 distinct services, which exceeds + // MetricCardinalityLimits.SERVICE (32) and leaves later tests seeing a shared "blocked" + // canonical for "a"/"b"/"c"-style services -- collapsing distinct snapshots into one entry. + AggregateEntry.resetCardinalityHandlers(); + } + @Test void insertOnMissReturnsNewAggregate() { AggregateTable table = new AggregateTable(8); diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/CardinalityHandlerTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/CardinalityHandlerTest.java new file mode 100644 index 00000000000..bf340078925 --- /dev/null +++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/CardinalityHandlerTest.java @@ -0,0 +1,227 @@ +package datadog.trace.common.metrics; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; + +import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; +import org.junit.jupiter.api.Test; + +class CardinalityHandlerTest { + + @Test + void propertyReturnsSameInstanceForRepeatedValueUntilLimit() { + PropertyCardinalityHandler h = new PropertyCardinalityHandler("test", 3); + UTF8BytesString a1 = h.register("a"); + UTF8BytesString a2 = h.register("a"); + assertSame(a1, a2); + assertEquals("a", a1.toString()); + } + + @Test + void propertyOverLimitReturnsBlockedSentinel() { + PropertyCardinalityHandler h = new PropertyCardinalityHandler("test", 2); + UTF8BytesString a = h.register("a"); + UTF8BytesString b = h.register("b"); + UTF8BytesString blocked1 = h.register("c"); + UTF8BytesString blocked2 = h.register("d"); + + assertEquals("tracer_blocked_value", blocked1.toString()); + assertSame(blocked1, blocked2); // same sentinel for all overflow values + assertNotSame(blocked1, a); + assertNotSame(blocked1, b); + } + + @Test + void propertyResetRefreshesBudget() { + PropertyCardinalityHandler h = new PropertyCardinalityHandler("test", 2); + h.register("a"); + h.register("b"); + UTF8BytesString blocked = h.register("c"); + assertEquals("tracer_blocked_value", blocked.toString()); + + h.reset(); + + // After reset, three distinct values fit again. Prior-cycle instances are reused + // (see propertyPriorCycleInstancesAreReusedAcrossReset for the dedicated check); here + // we just confirm that the budget refreshed so values previously blocked now have + // a slot. + UTF8BytesString afterReset = h.register("a"); + assertEquals("a", afterReset.toString()); + UTF8BytesString c = h.register("c"); + assertEquals("c", c.toString()); + UTF8BytesString blockedAgain = h.register("d"); + UTF8BytesString blockedYetAgain = h.register("e"); + assertEquals("tracer_blocked_value", blockedAgain.toString()); + assertSame(blockedAgain, blockedYetAgain); + } + + @Test + void propertyPriorCycleInstancesAreReusedAcrossReset() { + // Dual role: the handler is also a UTF8 cache. Values held in the prior cycle are + // reused on the first registration in the new cycle, so aggregate entries that hold a + // reference to a UTF8BytesString still match on identity after the per-cycle reset. + // This is the cache-survives-reset property the canonical-key lookup depends on. + PropertyCardinalityHandler h = new PropertyCardinalityHandler("test", 4); + UTF8BytesString aBefore = h.register("a"); + UTF8BytesString bBefore = h.register("b"); + + h.reset(); + + assertSame(aBefore, h.register("a")); + assertSame(bBefore, h.register("b")); + // Same-cycle subsequent registration continues to return the reused instance. + assertSame(aBefore, h.register("a")); + } + + @Test + void propertyPriorCycleReuseSurvivesOneResetButNotTwo() { + // Reuse window is one cycle deep -- the handler swaps current/prior on reset, so a + // value last seen two cycles ago is no longer cached and will be re-allocated. + PropertyCardinalityHandler h = new PropertyCardinalityHandler("test", 4); + UTF8BytesString first = h.register("a"); + + h.reset(); + h.reset(); + + UTF8BytesString afterTwoResets = h.register("a"); + assertNotSame(first, afterTwoResets); + assertEquals("a", afterTwoResets.toString()); + } + + @Test + void tagPrefixesValuesAndReusesUnderLimit() { + TagCardinalityHandler h = new TagCardinalityHandler("peer.hostname", 4); + UTF8BytesString first = h.register("host-a"); + UTF8BytesString second = h.register("host-a"); + UTF8BytesString other = h.register("host-b"); + + assertSame(first, second); + assertNotSame(first, other); + assertEquals("peer.hostname:host-a", first.toString()); + assertEquals("peer.hostname:host-b", other.toString()); + } + + @Test + void tagOverLimitReturnsTaggedSentinel() { + TagCardinalityHandler h = new TagCardinalityHandler("peer.service", 1); + h.register("svc-1"); + UTF8BytesString blocked = h.register("svc-2"); + assertEquals("peer.service:tracer_blocked_value", blocked.toString()); + } + + @Test + void tagResetRefreshesBudgetAndSentinelStaysStable() { + TagCardinalityHandler h = new TagCardinalityHandler("x", 1); + h.register("v1"); + UTF8BytesString blockedBefore = h.register("v2"); + h.reset(); + h.register("v1"); + UTF8BytesString blockedAfter = h.register("v2"); + // Both are the same sentinel instance (cacheBlocked is not cleared on reset). + assertSame(blockedBefore, blockedAfter); + } + + @Test + void tagPriorCycleInstancesAreReusedAcrossReset() { + // Mirrors propertyPriorCycleInstancesAreReusedAcrossReset: the pre-built "tag:value" + // UTF8BytesString from the prior cycle is reused on the first registration in the new + // cycle -- no re-concatenation, no re-encoding. + TagCardinalityHandler h = new TagCardinalityHandler("peer.hostname", 4); + UTF8BytesString hostABefore = h.register("host-a"); + UTF8BytesString hostBBefore = h.register("host-b"); + + h.reset(); + + assertSame(hostABefore, h.register("host-a")); + assertSame(hostBBefore, h.register("host-b")); + } + + @Test + void propertyRegisterOfNullReturnsEmpty() { + PropertyCardinalityHandler h = new PropertyCardinalityHandler("test", 4); + // Null input short-circuits to UTF8BytesString.EMPTY -- the universal "absent" sentinel that + // AggregateEntry's optional UTF8 fields use in place of null. + assertSame(UTF8BytesString.EMPTY, h.register(null)); + } + + @Test + void propertyRegisterOfNullDoesNotConsumeBudget() { + PropertyCardinalityHandler h = new PropertyCardinalityHandler("test", 2); + h.register(null); + h.register(null); + h.register(null); + // Three null registrations didn't consume the budget; two real values still fit. + assertEquals("a", h.register("a").toString()); + assertEquals("b", h.register("b").toString()); + // Third real value spills to the blocked sentinel (limit = 2). + assertEquals("tracer_blocked_value", h.register("c").toString()); + } + + @Test + void tagRegisterOfNullReturnsEmpty() { + TagCardinalityHandler h = new TagCardinalityHandler("peer.hostname", 4); + // Null returns EMPTY (no "tag:" prefix applied -- the sentinel is the same EMPTY singleton + // every handler returns for null input). + assertSame(UTF8BytesString.EMPTY, h.register(null)); + } + + // ---- limits-disabled mode (Config flag off): cache size still capped, but over-cap values + // get freshly-allocated UTF8 rather than the blocked sentinel. + + @Test + void propertyOverLimitWithSentinelDisabledReturnsFreshUtf8() { + PropertyCardinalityHandler h = new PropertyCardinalityHandler("test", 2, false); + UTF8BytesString a = h.register("a"); + UTF8BytesString b = h.register("b"); + UTF8BytesString c = h.register("c"); + UTF8BytesString d = h.register("d"); + + // Real values (not the "tracer_blocked_value" sentinel) so the wire format carries them. + assertEquals("c", c.toString()); + assertEquals("d", d.toString()); + // The first two stay cached and identity-stable. + assertSame(a, h.register("a")); + assertSame(b, h.register("b")); + // Over-cap values are NOT cached -- a second call allocates a fresh instance. + assertNotSame(c, h.register("c")); + assertEquals("c", h.register("c").toString()); + } + + @Test + void propertyOverLimitWithSentinelDisabledReusesPriorCycleInstances() { + // Prior-cycle reuse runs in disabled mode too: a value that was seen last cycle but is now + // over-budget still gets its prior-cycle UTF8BytesString back instead of an allocation. + PropertyCardinalityHandler h = new PropertyCardinalityHandler("test", 2, false); + UTF8BytesString cBeforeReset = h.register("c"); + + h.reset(); + + // Fill the budget with two different values so "c" lands over-cap. + h.register("x"); + h.register("y"); + UTF8BytesString cAfterReset = h.register("c"); + assertSame(cBeforeReset, cAfterReset); + } + + @Test + void tagOverLimitWithSentinelDisabledReturnsFreshUtf8() { + TagCardinalityHandler h = new TagCardinalityHandler("peer.hostname", 1, false); + h.register("host-a"); + UTF8BytesString hostB = h.register("host-b"); + UTF8BytesString hostC = h.register("host-c"); + + assertEquals("peer.hostname:host-b", hostB.toString()); + assertEquals("peer.hostname:host-c", hostC.toString()); + } + + @Test + void tagOverLimitWithSentinelDisabledNeverSubstitutesBlockedSentinel() { + // The sentinel should never materialize in disabled mode -- over-cap values carry their real + // "tag:value" content rather than the blocked sentinel. + TagCardinalityHandler h = new TagCardinalityHandler("peer.service", 1, false); + h.register("svc-1"); + UTF8BytesString overCap = h.register("svc-2"); + assertEquals("peer.service:svc-2", overCap.toString()); + } +} diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBootstrapTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ClientStatsAggregatorBootstrapTest.java similarity index 95% rename from dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBootstrapTest.java rename to dd-trace-core/src/test/java/datadog/trace/common/metrics/ClientStatsAggregatorBootstrapTest.java index 59681d4724e..758f5db9910 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBootstrapTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ClientStatsAggregatorBootstrapTest.java @@ -4,7 +4,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -12,7 +11,6 @@ import static org.mockito.Mockito.when; import datadog.communication.ddagent.DDAgentFeaturesDiscovery; -import datadog.trace.bootstrap.instrumentation.api.Tags; import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString; import datadog.trace.core.CoreSpan; import datadog.trace.core.SpanKindFilter; @@ -26,8 +24,7 @@ import org.mockito.ArgumentCaptor; /** - * Coverage for the {@code ConflatingMetricsAggregator} peer-tag schema bootstrap and reconcile - * paths. + * Coverage for the {@code ClientStatsAggregator} peer-tag schema bootstrap and reconcile paths. * *

      *
    • {@link #bootstrapHappensOnceOnFirstPublish()} -- verifies the synchronized producer-side @@ -43,7 +40,7 @@ * publishes see the new tags. *
    */ -class ConflatingMetricsAggregatorBootstrapTest { +class ClientStatsAggregatorBootstrapTest { @Test void bootstrapHappensOnceOnFirstPublish() { @@ -57,8 +54,8 @@ void bootstrapHappensOnceOnFirstPublish() { when(features.peerTags()).thenReturn(Collections.singleton("peer.hostname")); when(features.state()).thenReturn("state-1"); - ConflatingMetricsAggregator aggregator = - new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = + new ClientStatsAggregator( Collections.emptySet(), features, healthMetrics, @@ -94,8 +91,8 @@ void reconcileSkipsDeepCompareWhenStateMatches() throws Exception { when(features.peerTags()).thenReturn(Collections.singleton("peer.hostname")); when(features.state()).thenReturn("state-1"); - ConflatingMetricsAggregator aggregator = - new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = + new ClientStatsAggregator( Collections.emptySet(), features, healthMetrics, @@ -163,8 +160,8 @@ void reconcileSurvivesStateChangeWhenTagsUnchanged() throws Exception { // State hash changes every reconcile -- forces reconcile into the slow path each time. when(features.state()).thenReturn("state-1", "state-2", "state-3"); - ConflatingMetricsAggregator aggregator = - new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = + new ClientStatsAggregator( Collections.emptySet(), features, healthMetrics, @@ -236,8 +233,8 @@ void reconcileSwapsSchemaWhenTagSetChanges() throws Exception { // (mismatch -> slow path), stable at "state-2" for cycle 2's reconcile (match -> fast path). when(features.state()).thenReturn("state-1", "state-2", "state-2"); - ConflatingMetricsAggregator aggregator = - new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = + new ClientStatsAggregator( Collections.emptySet(), features, healthMetrics, @@ -324,7 +321,7 @@ private static CoreSpan peerAggregationSpan() { when(span.getHttpStatusCode()).thenReturn((short) 200); when(span.getParentId()).thenReturn(0L); when(span.getOrigin()).thenReturn(null); - when(span.unsafeGetTag(eq(Tags.SPAN_KIND), any(CharSequence.class))).thenReturn("client"); + when(span.getSpanKindString()).thenReturn("client"); // peer.hostname tag is set so capturePeerTagValues fires for the bootstrapped schema. when(span.unsafeGetTag("peer.hostname")).thenReturn("localhost"); return span; diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorDisableTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ClientStatsAggregatorDisableTest.java similarity index 95% rename from dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorDisableTest.java rename to dd-trace-core/src/test/java/datadog/trace/common/metrics/ClientStatsAggregatorDisableTest.java index d072371d25d..3239cca4e90 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorDisableTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ClientStatsAggregatorDisableTest.java @@ -6,7 +6,6 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.after; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; @@ -15,7 +14,6 @@ import static org.mockito.Mockito.when; import datadog.communication.ddagent.DDAgentFeaturesDiscovery; -import datadog.trace.bootstrap.instrumentation.api.Tags; import datadog.trace.core.CoreSpan; import datadog.trace.core.SpanKindFilter; import datadog.trace.core.monitor.HealthMetrics; @@ -45,7 +43,7 @@ * (the pre-fix path) only via races -- not deterministically -- so the assertions here are about * the observable end-to-end shape rather than thread identity. */ -class ConflatingMetricsAggregatorDisableTest { +class ClientStatsAggregatorDisableTest { @Test void downgradeRoutesClearThroughInboxBeforeNextReport() throws Exception { @@ -57,8 +55,8 @@ void downgradeRoutesClearThroughInboxBeforeNextReport() throws Exception { when(features.peerTags()).thenReturn(Collections.emptySet()); when(features.state()).thenReturn("state-1"); - ConflatingMetricsAggregator aggregator = - new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = + new ClientStatsAggregator( Collections.emptySet(), features, healthMetrics, @@ -147,8 +145,8 @@ void clearDoesNotTrampleQueuedStopSignal() throws Exception { when(features.peerTags()).thenReturn(Collections.emptySet()); when(features.state()).thenReturn("state-1"); - ConflatingMetricsAggregator aggregator = - new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = + new ClientStatsAggregator( Collections.emptySet(), features, healthMetrics, @@ -203,7 +201,7 @@ private static CoreSpan metricsEligibleSpan() { when(span.getHttpStatusCode()).thenReturn((short) 200); when(span.getParentId()).thenReturn(0L); when(span.getOrigin()).thenReturn(null); - when(span.unsafeGetTag(eq(Tags.SPAN_KIND), any(CharSequence.class))).thenReturn("client"); + when(span.getSpanKindString()).thenReturn("client"); return span; } @@ -228,7 +226,7 @@ private static CoreSpan markerSpan() { when(span.getHttpStatusCode()).thenReturn((short) 200); when(span.getParentId()).thenReturn(0L); when(span.getOrigin()).thenReturn(null); - when(span.unsafeGetTag(eq(Tags.SPAN_KIND), any(CharSequence.class))).thenReturn("client"); + when(span.getSpanKindString()).thenReturn("client"); return span; } } diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorInboxFullTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ClientStatsAggregatorInboxFullTest.java similarity index 87% rename from dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorInboxFullTest.java rename to dd-trace-core/src/test/java/datadog/trace/common/metrics/ClientStatsAggregatorInboxFullTest.java index f4e4c2da253..3683bef7f42 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorInboxFullTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ClientStatsAggregatorInboxFullTest.java @@ -2,14 +2,12 @@ import static java.util.concurrent.TimeUnit.SECONDS; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import datadog.communication.ddagent.DDAgentFeaturesDiscovery; -import datadog.trace.bootstrap.instrumentation.api.Tags; import datadog.trace.core.CoreSpan; import datadog.trace.core.SpanKindFilter; import datadog.trace.core.monitor.HealthMetrics; @@ -17,12 +15,12 @@ import org.junit.jupiter.api.Test; /** - * Coverage for the inbox-full fast-path in {@code ConflatingMetricsAggregator.publish}: when the + * Coverage for the inbox-full fast-path in {@code ClientStatsAggregator.publish}: when the * producer-side inbox is at capacity, the next {@code publish} call short-circuits before any tag * extraction or {@code SpanSnapshot} allocation and reports {@code onStatsInboxFull()} to health * metrics. */ -class ConflatingMetricsAggregatorInboxFullTest { +class ClientStatsAggregatorInboxFullTest { @Test void publishFiresOnStatsInboxFullOnceInboxIsAtCapacity() { @@ -38,8 +36,8 @@ void publishFiresOnStatsInboxFullOnceInboxIsAtCapacity() { // never drains -- snapshots accumulate in the inbox until capacity, then the next publish hits // the size-vs-capacity fast path. int queueSize = 8; - ConflatingMetricsAggregator aggregator = - new ConflatingMetricsAggregator( + ClientStatsAggregator aggregator = + new ClientStatsAggregator( Collections.emptySet(), features, healthMetrics, @@ -78,7 +76,7 @@ private static CoreSpan metricsEligibleSpan() { when(span.getHttpStatusCode()).thenReturn((short) 200); when(span.getParentId()).thenReturn(0L); when(span.getOrigin()).thenReturn(null); - when(span.unsafeGetTag(eq(Tags.SPAN_KIND), any(CharSequence.class))).thenReturn("client"); + when(span.getSpanKindString()).thenReturn("client"); return span; } } diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/PeerTagSchemaTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/PeerTagSchemaTest.java index 7d818a2686b..3c0701b7538 100644 --- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/PeerTagSchemaTest.java +++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/PeerTagSchemaTest.java @@ -3,7 +3,6 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; @@ -13,20 +12,15 @@ import java.util.Set; import org.junit.jupiter.api.Test; -/** - * Unit tests for {@link PeerTagSchema}. Covers the {@link PeerTagSchema#hasSameTagsAs(Set)} - * predicate that drives the aggregator's reconcile fast/slow path split, the factory shapes, and - * the {@link PeerTagSchema#INTERNAL} singleton. - */ class PeerTagSchemaTest { @Test void ofBuildsSchemaFromSetWithState() { Set tags = new LinkedHashSet<>(Arrays.asList("peer.hostname", "peer.service")); - PeerTagSchema schema = PeerTagSchema.of(tags, "abc123"); + PeerTagSchema schema = PeerTagSchema.of(tags, "state-1234"); assertArrayEquals(new String[] {"peer.hostname", "peer.service"}, schema.names); - assertEquals("abc123", schema.state); + assertEquals("state-1234", schema.state); assertEquals(2, schema.size()); } @@ -36,7 +30,6 @@ void ofHandlesEmptySet() { assertEquals(0, schema.size()); assertEquals(0, schema.names.length); - assertNull(schema.state); } @Test diff --git a/dd-trace-core/src/traceAgentTest/groovy/datadog/trace/common/metrics/MetricsIntegrationTest.groovy b/dd-trace-core/src/traceAgentTest/groovy/datadog/trace/common/metrics/MetricsIntegrationTest.groovy deleted file mode 100644 index 401aaef4b7e..00000000000 --- a/dd-trace-core/src/traceAgentTest/groovy/datadog/trace/common/metrics/MetricsIntegrationTest.groovy +++ /dev/null @@ -1,75 +0,0 @@ -package datadog.trace.common.metrics - -import static datadog.communication.ddagent.DDAgentFeaturesDiscovery.V06_METRICS_ENDPOINT -import static datadog.trace.common.metrics.EventListener.EventType.OK -import static java.util.concurrent.TimeUnit.SECONDS - -import datadog.communication.http.OkHttpUtils -import datadog.metrics.api.Histograms -import datadog.metrics.impl.DDSketchHistograms -import datadog.trace.api.Config -import datadog.trace.api.WellKnownTags -import java.util.concurrent.CopyOnWriteArrayList -import java.util.concurrent.CountDownLatch -import okhttp3.HttpUrl - -class MetricsIntegrationTest extends AbstractTraceAgentTest { - - def setupSpec() { - // Initialize metrics-lib histograms to register the DDSketch implementation - Histograms.register(DDSketchHistograms.FACTORY) - } - - def "send metrics to trace agent should notify with OK event"() { - setup: - def latch = new CountDownLatch(1) - def listener = new BlockingListener(latch) - def agentUrl = Config.get().getAgentUrl() - OkHttpSink sink = new OkHttpSink(OkHttpUtils.buildHttpClient(HttpUrl.parse(agentUrl), 5000L), agentUrl, V06_METRICS_ENDPOINT, true, false, [:]) - sink.register(listener) - - when: - SerializingMetricWriter writer = new SerializingMetricWriter( - new WellKnownTags("runtimeid","hostname", "env", "service", "version","language"), - sink - ) - writer.startBucket(2, System.nanoTime(), SECONDS.toNanos(10)) - // Build entries via the production AggregateEntry.forSnapshot(snap, keyHash) path -- same - // construction as AggregateTable.findOrInsert. Both entries use one peer tag (grault:quux) - // -> schema names=["grault"], values=["quux"]. - PeerTagSchema schema = PeerTagSchema.testSchema(["grault"] as String[]) - SpanSnapshot snap1 = new SpanSnapshot( - "resource1", "service1", "operation1", null, "sql", (short) 0, - false, true, "xyzzy", schema, ["quux"] as String[], null, null, null, 0L) - def entry1 = new AggregateEntry(snap1, AggregateEntry.hashOf(snap1)) - [2, 1, 2, 250, 4].each { entry1.recordOneDuration(it as long) } - writer.add(entry1) - SpanSnapshot snap2 = new SpanSnapshot( - "resource2", "service2", "operation2", null, "web", (short) 200, - false, true, "xyzzy", schema, ["quux"] as String[], null, null, null, 0L) - def entry2 = new AggregateEntry(snap2, AggregateEntry.hashOf(snap2)) - [1, 1, 200, 2, 3, 4, 5, 6, 7, 8].each { entry2.recordOneDuration(it as long) } - writer.add(entry2) - writer.finishBucket() - - then: - latch.await(5, SECONDS) - listener.events.size() == 1 && listener.events[0] == OK - } - - static class BlockingListener implements EventListener { - - List events = new CopyOnWriteArrayList<>() - final CountDownLatch latch - - BlockingListener(CountDownLatch latch) { - this.latch = latch - } - - @Override - void onEvent(EventType eventType, String message) { - events.add(eventType) - latch.countDown() - } - } -} diff --git a/dd-trace-core/src/traceAgentTest/java/datadog/trace/common/metrics/MetricsIntegrationTest.java b/dd-trace-core/src/traceAgentTest/java/datadog/trace/common/metrics/MetricsIntegrationTest.java new file mode 100644 index 00000000000..172eddf0bd5 --- /dev/null +++ b/dd-trace-core/src/traceAgentTest/java/datadog/trace/common/metrics/MetricsIntegrationTest.java @@ -0,0 +1,186 @@ +package datadog.trace.common.metrics; + +import static datadog.communication.ddagent.DDAgentFeaturesDiscovery.V06_METRICS_ENDPOINT; +import static datadog.trace.junit.utils.config.WithConfigExtension.injectSysConfig; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.communication.http.OkHttpUtils; +import datadog.metrics.agent.AgentMeter; +import datadog.metrics.api.statsd.StatsDClient; +import datadog.metrics.impl.DDSketchHistograms; +import datadog.metrics.impl.MonitoringImpl; +import datadog.trace.api.Config; +import datadog.trace.api.ConfigDefaults; +import datadog.trace.api.WellKnownTags; +import datadog.trace.api.config.TracerConfig; +import datadog.trace.junit.utils.config.WithConfigExtension; +import java.time.Duration; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import okhttp3.HttpUrl; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.startupcheck.MinimumDurationRunningStartupCheckStrategy; + +@ExtendWith(WithConfigExtension.class) +class MetricsIntegrationTest { + + // CI runs an agent container alongside the build (reached via CI_AGENT_HOST); when building + // locally we start one ourselves with testcontainers. + private static final boolean RUNNING_IN_CI = "true".equals(System.getenv("CI")); + private static GenericContainer agentContainer; + + @BeforeAll + static void setupSpec() { + // recordOneDuration -> Histogram.accept needs the metrics meter / histogram factory registered. + MonitoringImpl monitoring = new MonitoringImpl(StatsDClient.NO_OP, 1, TimeUnit.SECONDS); + AgentMeter.registerIfAbsent(StatsDClient.NO_OP, monitoring, DDSketchHistograms.FACTORY); + + if (!RUNNING_IN_CI) { + Map env = new HashMap<>(); + env.put("DD_APM_ENABLED", "true"); + env.put("DD_BIND_HOST", "0.0.0.0"); + env.put("DD_API_KEY", "invalid_key_but_this_is_fine"); + env.put("DD_HOSTNAME", "doesnotexist"); + env.put("DD_LOGS_STDOUT", "yes"); + agentContainer = + new GenericContainer<>("datadog/agent:7.40.1") + .withEnv(env) + .withExposedPorts(ConfigDefaults.DEFAULT_TRACE_AGENT_PORT) + .withStartupTimeout(Duration.ofSeconds(120)) + // Sleep for a bit so the agent's rate_by_service response is populated -- mirrors the + // race-condition workaround from the original Spock base. + .withStartupCheckStrategy( + new MinimumDurationRunningStartupCheckStrategy(Duration.ofSeconds(10))); + agentContainer.start(); + } + } + + @AfterAll + static void cleanupSpec() { + if (agentContainer != null) { + agentContainer.stop(); + } + } + + @BeforeEach + void setup() { + injectSysConfig(TracerConfig.AGENT_HOST, agentHost()); + injectSysConfig(TracerConfig.TRACE_AGENT_PORT, agentPort()); + } + + private static String agentHost() { + return agentContainer != null ? agentContainer.getHost() : System.getenv("CI_AGENT_HOST"); + } + + private static String agentPort() { + return agentContainer != null + ? String.valueOf(agentContainer.getMappedPort(ConfigDefaults.DEFAULT_TRACE_AGENT_PORT)) + : String.valueOf(ConfigDefaults.DEFAULT_TRACE_AGENT_PORT); + } + + @Test + void sendMetricsToTraceAgentShouldNotifyWithOkEvent() throws InterruptedException { + // setup + CountDownLatch latch = new CountDownLatch(1); + BlockingListener listener = new BlockingListener(latch); + String agentUrl = Config.get().getAgentUrl(); + OkHttpSink sink = + new OkHttpSink( + OkHttpUtils.buildHttpClient(HttpUrl.parse(agentUrl), 5000L), + agentUrl, + V06_METRICS_ENDPOINT, + true, + false, + Collections.emptyMap()); + sink.register(listener); + + // when + SerializingMetricWriter writer = + new SerializingMetricWriter( + new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language"), + sink); + writer.startBucket(2, System.nanoTime(), SECONDS.toNanos(10)); + // Build entries through the production AggregateTable.findOrInsert path (canonicalizes the + // snapshot and creates/looks up the entry). Both entries use one peer tag (grault:quux) and no + // additional tags -> schema names=["grault"], values=["quux"]. + AggregateTable table = new AggregateTable(8); + PeerTagSchema schema = PeerTagSchema.testSchema(new String[] {"grault"}); + SpanSnapshot snap1 = + new SpanSnapshot( + "resource1", + "service1", + "operation1", + null, + "sql", + (short) 0, + false, + true, + "xyzzy", + schema, + new String[] {"quux"}, + null, + null, + null, + 0L); + AggregateEntry entry1 = table.findOrInsert(snap1); + for (long duration : new long[] {2, 1, 2, 250, 4}) { + entry1.recordOneDuration(duration); + } + writer.add(entry1); + SpanSnapshot snap2 = + new SpanSnapshot( + "resource2", + "service2", + "operation2", + null, + "web", + (short) 200, + false, + true, + "xyzzy", + schema, + new String[] {"quux"}, + null, + null, + null, + 0L); + AggregateEntry entry2 = table.findOrInsert(snap2); + for (long duration : new long[] {1, 1, 200, 2, 3, 4, 5, 6, 7, 8}) { + entry2.recordOneDuration(duration); + } + writer.add(entry2); + writer.finishBucket(); + + // then + assertTrue(latch.await(5, SECONDS)); + assertEquals(1, listener.events.size()); + assertEquals(EventListener.EventType.OK, listener.events.get(0)); + } + + static class BlockingListener implements EventListener { + final List events = new CopyOnWriteArrayList<>(); + final CountDownLatch latch; + + BlockingListener(CountDownLatch latch) { + this.latch = latch; + } + + @Override + public void onEvent(EventListener.EventType eventType, String message) { + events.add(eventType); + latch.countDown(); + } + } +} diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java index 6418bab301e..83512a8504e 100644 --- a/internal-api/src/main/java/datadog/trace/api/Config.java +++ b/internal-api/src/main/java/datadog/trace/api/Config.java @@ -1018,7 +1018,6 @@ public static String getHostName() { private final boolean tracerMetricsBufferingEnabled; private final int tracerMetricsMaxAggregates; private final int tracerMetricsMaxPending; - private final boolean reportHostName; private final boolean traceAnalyticsEnabled; @@ -2245,7 +2244,6 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins (long) configProvider.getInteger(TRACER_METRICS_MAX_PENDING, defaultMaxPending) * LEGACY_BATCH_SIZE; tracerMetricsMaxPending = (int) Math.min(requestedMaxPending, MAX_SAFE_ARRAY_SIZE); - reportHostName = configProvider.getBoolean(TRACE_REPORT_HOSTNAME, DEFAULT_TRACE_REPORT_HOSTNAME);