diff --git a/dd-trace-core/src/jmh/java/datadog/trace/util/concurrent/RingVsQueueBenchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/util/concurrent/RingVsQueueBenchmark.java
new file mode 100644
index 00000000000..6a1e3d7c802
--- /dev/null
+++ b/dd-trace-core/src/jmh/java/datadog/trace/util/concurrent/RingVsQueueBenchmark.java
@@ -0,0 +1,191 @@
+package datadog.trace.util.concurrent;
+
+import static java.util.concurrent.TimeUnit.SECONDS;
+
+import java.util.function.BiConsumer;
+import org.jctools.queues.MpscArrayQueue;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Group;
+import org.openjdk.jmh.annotations.GroupThreads;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Head-to-head comparison of {@link MpscRingBuffer} (mutable pre-allocated slots) against the
+ * conventional approach of a jctools {@link MpscArrayQueue} with a fresh {@code Slot} allocated per
+ * publish. The latter is the pattern the current CSS code uses for {@code SpanSnapshot} on the
+ * producer side, so the delta between the two measures the actual allocation/handoff saving of the
+ * ring-buffer rewrite.
+ *
+ * <ul>
+ *   <li>{@code write_*_8p} — 8 producers, background drainer keeps the structure empty so the
+ *       measurement reflects publish cost, not full-structure drop cost. Pair-compare ring vs queue
+ *       at matched capacity.
+ *   <li>{@code e2e_*_8p} — JMH {@code @Group} pairing 8 producers with 1 consumer for each
+ *       structure. End-to-end ops/s under realistic backpressure.
+ * </ul>
+ *
+ * <p>Run with {@code -prof gc} to also see per-op allocation rate — that's where the ring's win is
+ * loudest, since the queue allocates one {@code Slot} per publish and the ring allocates none.
+ */
+@State(Scope.Benchmark)
+@Warmup(iterations = 2, time = 15, timeUnit = SECONDS)
+@Measurement(iterations = 5, time = 15, timeUnit = SECONDS)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(SECONDS)
+@Fork(value = 1)
+public class RingVsQueueBenchmark {
+
+  /**
+   * Shared slot type. {@code MpscRingBuffer} pre-allocates these and the producer mutates in place;
+   * the queue path allocates a fresh one per publish and offers the reference. Two constructors so
+   * both styles read naturally.
+   */
+  public static final class Slot {
+    long value;
+
+    Slot() {}
+
+    Slot(final long value) {
+      this.value = value;
+    }
+  }
+
+  // Static (non-capturing) handlers. Passing ts/bh as context lets the JIT keep these as
+  // singleton functions and avoid per-call lambda allocation.
+  private static final BiConsumer<ThreadState, Slot> RING_FILLER =
+      (ts, slot) -> {
+        slot.value = ts.counter;
+        ts.counter++;
+      };
+
+  private static final BiConsumer<Blackhole, Slot> RING_CONSUMER =
+      (bh, slot) -> bh.consume(slot.value);
+
+  @Param({"1024"})
+  public int capacity;
+
+  /** Write-side benchmark structures. Drained by a background thread so they never fill. */
+  MpscRingBuffer<Slot> ring;
+
+  MpscArrayQueue<Slot> queue;
+
+  /** E2e benchmark structures. JMH drives both sides via {@code @Group}; no background drainer. */
+  MpscRingBuffer<Slot> e2eRing;
+
+  MpscArrayQueue<Slot> e2eQueue;
+
+  private volatile boolean stopDrainers;
+  private Thread ringDrainer;
+  private Thread queueDrainer;
+
+  @Setup(Level.Trial)
+  public void setup() {
+    ring = new MpscRingBuffer<>(Slot::new, capacity);
+    queue = new MpscArrayQueue<>(capacity);
+    e2eRing = new MpscRingBuffer<>(Slot::new, capacity);
+    e2eQueue = new MpscArrayQueue<>(capacity);
+
+    stopDrainers = false;
+    ringDrainer =
+        new Thread(
+            () -> {
+              while (!stopDrainers) {
+                if (ring.drain((Slot s) -> {}) == 0) Thread.yield();
+              }
+            },
+            "RingVsQueueBenchmark-ringDrainer");
+    ringDrainer.setDaemon(true);
+    ringDrainer.start();
+
+    queueDrainer =
+        new Thread(
+            () -> {
+              while (!stopDrainers) {
+                Slot s = queue.poll();
+                if (s == null) Thread.yield();
+              }
+            },
+            "RingVsQueueBenchmark-queueDrainer");
+    queueDrainer.setDaemon(true);
+    queueDrainer.start();
+  }
+
+  @TearDown(Level.Trial)
+  public void teardown() throws InterruptedException {
+    stopDrainers = true;
+    ringDrainer.join(5_000);
+    queueDrainer.join(5_000);
+  }
+
+  @State(Scope.Thread)
+  public static class ThreadState {
+    long counter;
+  }
+
+  // ============ Write-side throughput ============
+
+  @Threads(8)
+  @Benchmark
+  public boolean write_ring_8p(final ThreadState ts) {
+    return ring.tryWrite(ts, RING_FILLER);
+  }
+
+  /** Mirrors the SpanSnapshot pattern: allocate a fresh instance per publish, offer it. */
+  @Threads(8)
+  @Benchmark
+  public boolean write_queue_8p(final ThreadState ts) {
+    return queue.offer(new Slot(ts.counter++));
+  }
+
+  // ============ End-to-end producer/consumer ============
+
+  @Group("e2e_ring_8p")
+  @GroupThreads(8)
+  @Benchmark
+  public boolean e2e_ring_producer(final ThreadState ts) {
+    return e2eRing.tryWrite(ts, RING_FILLER);
+  }
+
+  @Group("e2e_ring_8p")
+  @GroupThreads(1)
+  @Benchmark
+  public int e2e_ring_consumer(final Blackhole bh) {
+    int drained = e2eRing.drain(bh, RING_CONSUMER);
+    if (drained == 0) Thread.yield();
+    return drained;
+  }
+
+  @Group("e2e_queue_8p")
+  @GroupThreads(8)
+  @Benchmark
+  public boolean e2e_queue_producer(final ThreadState ts) {
+    return e2eQueue.offer(new Slot(ts.counter++));
+  }
+
+  @Group("e2e_queue_8p")
+  @GroupThreads(1)
+  @Benchmark
+  public int e2e_queue_consumer(final Blackhole bh) {
+    int drained = 0;
+    Slot slot;
+    while ((slot = e2eQueue.poll()) != null) {
+      bh.consume(slot.value);
+      drained++;
+    }
+    if (drained == 0) Thread.yield();
+    return drained;
+  }
+}
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java
new file mode 100644
index 00000000000..a518090f93b
--- /dev/null
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java
@@ -0,0 +1,440 @@
+package datadog.trace.common.metrics;
+
+import static datadog.trace.bootstrap.instrumentation.api.UTF8BytesString.EMPTY;
+
+import datadog.metrics.api.Histogram;
+import datadog.trace.api.Pair;
+import datadog.trace.api.cache.DDCache;
+import datadog.trace.api.cache.DDCaches;
+import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
+import datadog.trace.util.Hashtable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.function.Function;
+import javax.annotation.Nullable;
+
+/**
+ * Hashtable entry for the consumer-side aggregator. Holds the UTF8-encoded label fields that {@link
+ * SerializingMetricWriter} writes to the wire plus the mutable counter/histogram state for the key.
+ *
+ * <p>{@link #matches(SpanSnapshot)} compares the entry's stored UTF8 forms against the snapshot's
+ * raw {@code CharSequence}/{@code String}/{@code String[]} fields via content-equality, so {@code
+ * String} vs {@code UTF8BytesString} mixing on the same logical key collapses into one entry
+ * instead of splitting.
+ *
+ * <p>The static UTF8 caches that used to live on {@code MetricKey} and {@code
+ * ConflatingMetricsAggregator} are consolidated here.
+ *
+ * <p><b>Deliberate cohesion.</b> This class concentrates five responsibilities -- the static UTF8
+ * caches, the canonicalized label fields, the raw {@code peerTagNames}/{@code peerTagValues} arrays
+ * used by {@link #matches}, the pre-encoded {@code peerTags} list used by the serializer, and the
+ * mutable counter/histogram aggregate state -- on a single object. The prior design split the label
+ * fields and aggregate state across separate {@code MetricKey} and {@code AggregateMetric}
+ * instances, allocating both per unique key on miss; folding them yields one allocation per unique
+ * key. The class is wider than its predecessors as a result, but that's the trade we explicitly
+ * chose.
+ *
+ * <p><b>Required vs optional field absence.</b> Required label fields ({@code resource}, {@code
+ * service}, {@code operationName}, {@code type}, {@code spanKind}) canonicalize a {@code null}
+ * snapshot value into {@link UTF8BytesString#EMPTY} via {@link #canonicalize} -- they are never
+ * {@code null} on a constructed entry. Optional label fields ({@code serviceSource}, {@code
+ * httpMethod}, {@code httpEndpoint}, {@code grpcStatusCode}) stay {@code null} on the entry when
+ * the snapshot value was {@code null}; the serializer uses {@code != null} to decide whether to
+ * emit them on the wire. {@link #contentEquals} treats {@code null} and length-0 as equivalent so
+ * {@link #matches} works against either form.
+ *
+ * <p><b>Not thread-safe.</b> Counter and histogram updates are performed by the single aggregator
+ * thread; producer threads tag durations via {@link #ERROR_TAG} / {@link #TOP_LEVEL_TAG} bits and
+ * hand them off through the snapshot inbox.
+ *
+ * <p><b>Single-writer invariant relies on convention.</b> The aggregator thread is the only mutator
+ * of this class and of {@link AggregateTable}. Nothing enforces this at runtime -- a stray mutation
+ * from a different thread (e.g. an HTTP-client callback) would corrupt counters or hashtable chains
+ * silently. The {@code ClearSignal} routing in {@link Aggregator} is the explicit mechanism for
+ * funneling cross-thread requests (e.g. {@code disable()}) back onto the aggregator thread; any new
+ * entry point that mutates aggregate state must do the same.
+ */
+final class AggregateEntry extends Hashtable.Entry {
+
+  static final long ERROR_TAG = 0x8000000000000000L;
+  static final long TOP_LEVEL_TAG = 0x4000000000000000L;
+
+  // UTF8 caches. Package-private so ConflatingMetricsAggregator's fillSlot can canonicalize
+  // producer-side via the same caches the entry constructor used to consult.
+  static final DDCache<String, UTF8BytesString> RESOURCE_CACHE = DDCaches.newFixedSizeCache(32);
+  static final DDCache<String, UTF8BytesString> SERVICE_CACHE = DDCaches.newFixedSizeCache(32);
+  static final DDCache<String, UTF8BytesString> OPERATION_CACHE = DDCaches.newFixedSizeCache(64);
+  static final DDCache<String, UTF8BytesString> SERVICE_SOURCE_CACHE =
+      DDCaches.newFixedSizeCache(16);
+  static final DDCache<String, UTF8BytesString> TYPE_CACHE = DDCaches.newFixedSizeCache(8);
+  static final DDCache<String, UTF8BytesString> SPAN_KIND_CACHE = DDCaches.newFixedSizeCache(16);
+  static final DDCache<String, UTF8BytesString> HTTP_METHOD_CACHE = DDCaches.newFixedSizeCache(8);
+  static final DDCache<String, UTF8BytesString> HTTP_ENDPOINT_CACHE =
+      DDCaches.newFixedSizeCache(32);
+  static final DDCache<String, UTF8BytesString> GRPC_STATUS_CODE_CACHE =
+      DDCaches.newFixedSizeCache(32);
+
+  /**
+   * Outer cache keyed by peer-tag name, with an inner per-name cache keyed by value. The inner
+   * cache produces the "name:value" encoded form the serializer writes.
+   */
+  private static final DDCache<
+          String, Pair<DDCache<String, UTF8BytesString>, Function<String, UTF8BytesString>>>
+      PEER_TAGS_CACHE = DDCaches.newFixedSizeCache(64);
+
+  private static final Function<
+          String, Pair<DDCache<String, UTF8BytesString>, Function<String, UTF8BytesString>>>
+      PEER_TAGS_CACHE_ADDER =
+          key ->
+              Pair.of(
+                  DDCaches.newFixedSizeCache(512),
+                  value -> UTF8BytesString.create(key + ":" + value));
+
+  private final UTF8BytesString resource;
+  private final UTF8BytesString service;
+  private final UTF8BytesString operationName;
+  @Nullable private final UTF8BytesString serviceSource;
+  private final UTF8BytesString type;
+  private final UTF8BytesString spanKind;
+  @Nullable private final UTF8BytesString httpMethod;
+  @Nullable private final UTF8BytesString httpEndpoint;
+  @Nullable private final UTF8BytesString grpcStatusCode;
+  private final short httpStatusCode;
+
+  /** Whether the root span carried the {@code synthetics} origin tag (synthetic-monitoring run). */
+  private final boolean synthetic;
+
+  /** Whether this span is the trace root ({@code parentId == 0}). */
+  private final boolean traceRoot;
+
+  // Peer tags carried in two forms: parallel String[] arrays mirroring the snapshot's (schema +
+  // values) shape for matches(), and pre-encoded List<UTF8BytesString> ("name:value") for the
+  // serializer. peerTagNames is the schema's names array (shared by-reference when the schema
+  // hasn't been replaced); peerTagValues is the per-span String[] parallel to it.
+  //
+  // Package-private so the in-package test helper (AggregateEntryTestUtils) can compare entries
+  // by raw layout; production access comes from this class's own matches() + constructor.
+  @Nullable final String[] peerTagNames;
+  @Nullable final String[] peerTagValues;
+  private final List<UTF8BytesString> peerTags;
+
+  // Mutable aggregate state -- single-thread (consumer/aggregator) writer.
+  private final Histogram okLatencies = Histogram.newHistogram();
+
+  /**
+   * Lazily allocated on the first recorded error. Most entries never see an error and keep this
+   * null for life; {@link SerializingMetricWriter} writes a cached empty-histogram form when null
+   * to keep the wire payload identical. Once allocated, it survives {@link #clear()} (cleared, not
+   * nulled) since an entry that errored once tends to error again.
+   */
+  @Nullable private Histogram errorLatencies;
+
+  private int errorCount;
+  private int hitCount;
+  private int topLevelCount;
+  private long duration;
+
+  /** Hot-path constructor for the producer/consumer flow. Slot fields are already canonical. */
+  AggregateEntry(SpanSnapshot s) {
+    super(s.keyHash);
+    // The slot's string fields are already canonical UTF8BytesString instances (canonicalized by
+    // the producer in fillSlot via the same caches we used to consult here). Just copy refs;
+    // matches() can do identity comparison instead of contentEquals.
+    this.resource = s.resourceName;
+    this.service = s.serviceName;
+    this.operationName = s.operationName;
+    this.serviceSource = s.serviceNameSource;
+    this.type = s.spanType;
+    this.spanKind = s.spanKind;
+    this.httpMethod = s.httpMethod;
+    this.httpEndpoint = s.httpEndpoint;
+    this.grpcStatusCode = s.grpcStatusCode;
+    this.httpStatusCode = s.httpStatusCode;
+    this.synthetic = s.synthetic;
+    this.traceRoot = s.traceRoot;
+    this.peerTagNames = s.peerTagSchema == null ? null : s.peerTagSchema.names;
+    // The slot's peerTagValues is a reusable scratch buffer owned by the ring -- the producer
+    // overwrites it on every claim of this slot. Snapshot it here by value so this entry's
+    // identity doesn't drift when the slot is reclaimed.
+    this.peerTagValues =
+        s.peerTagSchema == null ? null : Arrays.copyOf(s.peerTagValues, s.peerTagValues.length);
+    this.peerTags = materializePeerTags(this.peerTagNames, this.peerTagValues);
+  }
+
+  /**
+   * Records a single hit. {@code tagAndDuration} carries the duration nanos with optional {@link
+   * #ERROR_TAG} / {@link #TOP_LEVEL_TAG} bits OR-ed in.
+   */
+  void recordOneDuration(long tagAndDuration) {
+    ++hitCount;
+    if ((tagAndDuration & TOP_LEVEL_TAG) == TOP_LEVEL_TAG) {
+      tagAndDuration ^= TOP_LEVEL_TAG;
+      ++topLevelCount;
+    }
+    if ((tagAndDuration & ERROR_TAG) == ERROR_TAG) {
+      tagAndDuration ^= ERROR_TAG;
+      errorLatenciesForWrite().accept(tagAndDuration);
+      ++errorCount;
+    } else {
+      okLatencies.accept(tagAndDuration);
+    }
+    duration += tagAndDuration;
+  }
+
+  int getErrorCount() {
+    return errorCount;
+  }
+
+  int getHitCount() {
+    return hitCount;
+  }
+
+  int getTopLevelCount() {
+    return topLevelCount;
+  }
+
+  long getDuration() {
+    return duration;
+  }
+
+  Histogram getOkLatencies() {
+    return okLatencies;
+  }
+
+  /**
+   * Returns the entry's error-latency histogram, or {@code null} if no error has been recorded.
+   * Callers serializing this should treat {@code null} as "emit a cached empty histogram"; see
+   * {@link SerializingMetricWriter}.
+   */
+  @Nullable
+  Histogram getErrorLatencies() {
+    return errorLatencies;
+  }
+
+  /** Lazy-allocates {@link #errorLatencies} on the first error. */
+  private Histogram errorLatenciesForWrite() {
+    Histogram h = errorLatencies;
+    if (h == null) {
+      h = Histogram.newHistogram();
+      errorLatencies = h;
+    }
+    return h;
+  }
+
+  /**
+   * Resets the per-cycle counters and histograms. Label fields ({@code resource}, {@code service},
+   * ..., {@code peerTagNames}, {@code peerTagValues}) are deliberately left intact -- they're the
+   * entry's bucket identity and must persist so a subsequent snapshot with the same key reuses this
+   * entry instead of allocating a fresh one. Entries that stay at {@code hitCount == 0} across a
+   * cycle are reaped by {@link AggregateTable#expungeStaleAggregates}.
+   */
+  void clear() {
+    this.errorCount = 0;
+    this.hitCount = 0;
+    this.topLevelCount = 0;
+    this.duration = 0;
+    this.okLatencies.clear();
+    // errorLatencies stays null on entries that never errored. Only clear if it was allocated.
+    if (this.errorLatencies != null) {
+      this.errorLatencies.clear();
+    }
+  }
+
+  boolean matches(SpanSnapshot s) {
+    // All string fields are canonical UTF8BytesString references from the per-field DDCaches.
+    // Same content => same canonical instance => identity comparison suffices.
+    // peerTagSchema is also shared by reference (producer reads cachedPeerTagSchema), so its
+    // names array reference is stable too. peerTagValues stays content-compared since values
+    // vary per publish and aren't canonicalized.
+    return httpStatusCode == s.httpStatusCode
+        && synthetic == s.synthetic
+        && traceRoot == s.traceRoot
+        && resource == s.resourceName
+        && service == s.serviceName
+        && operationName == s.operationName
+        && serviceSource == s.serviceNameSource
+        && type == s.spanType
+        && spanKind == s.spanKind
+        // PeerTagSchema instances aren't guaranteed to be shared by reference across all
+        // producers (cachedPeerTagSchema can be swapped during reconcile; tests build fresh
+        // instances), so compare names by content. The same-reference case short-circuits to
+        // O(1) via Arrays.equals's a==b fast path.
+        && Arrays.equals(peerTagNames, s.peerTagSchema == null ? null : s.peerTagSchema.names)
+        && (peerTagNames == null || Arrays.equals(peerTagValues, s.peerTagValues))
+        && httpMethod == s.httpMethod
+        && httpEndpoint == s.httpEndpoint
+        && grpcStatusCode == s.grpcStatusCode;
+  }
+
+  /**
+   * Pre-checks {@link #keyHash} against {@code keyHash} before delegating to {@link
+   * #matches(SpanSnapshot)}. The hash check is cheap and rules out most mismatches without touching
+   * the field-by-field comparison.
+   */
+  boolean matches(long keyHash, SpanSnapshot s) {
+    return this.keyHash == keyHash && matches(s);
+  }
+
+  // Accessors for SerializingMetricWriter.
+  UTF8BytesString getResource() {
+    return resource;
+  }
+
+  UTF8BytesString getService() {
+    return service;
+  }
+
+  UTF8BytesString getOperationName() {
+    return operationName;
+  }
+
+  @Nullable
+  UTF8BytesString getServiceSource() {
+    return serviceSource;
+  }
+
+  UTF8BytesString getType() {
+    return type;
+  }
+
+  UTF8BytesString getSpanKind() {
+    return spanKind;
+  }
+
+  @Nullable
+  UTF8BytesString getHttpMethod() {
+    return httpMethod;
+  }
+
+  @Nullable
+  UTF8BytesString getHttpEndpoint() {
+    return httpEndpoint;
+  }
+
+  @Nullable
+  UTF8BytesString getGrpcStatusCode() {
+    return grpcStatusCode;
+  }
+
+  int getHttpStatusCode() {
+    return httpStatusCode;
+  }
+
+  boolean isSynthetics() {
+    return synthetic;
+  }
+
+  boolean isTraceRoot() {
+    return traceRoot;
+  }
+
+  List<UTF8BytesString> getPeerTags() {
+    return peerTags;
+  }
+
+  // Production AggregateEntry intentionally has no equals/hashCode override -- AggregateTable
+  // bucketing uses keyHash + matches(SpanSnapshot) directly and never invokes Object.equals.
+  // For tests that need value-equality (Spock argument matchers), use AggregateEntryTestUtils in
+  // src/test, which provides equals/hashCode helpers without exposing the contract in production.
+
+  // ----- helpers -----
+
+  static UTF8BytesString canonicalize(
+      DDCache<String, UTF8BytesString> cache, CharSequence charSeq) {
+    // Treat null and length-zero as the same canonical EMPTY instance so the producer-side
+    // canonicalization produces identity-equal results for null/"" inputs. matches() now uses
+    // identity comparison, so the previous content-equals collapse needs to happen here.
+    if (charSeq == null || charSeq.length() == 0) {
+      return EMPTY;
+    }
+    if (charSeq instanceof UTF8BytesString) {
+      return (UTF8BytesString) charSeq;
+    }
+    return cache.computeIfAbsent(charSeq.toString(), UTF8BytesString::create);
+  }
+
+  /**
+   * Like {@link #canonicalize} but returns {@code null} for a {@code null} input (rather than
+   * {@link UTF8BytesString#EMPTY}). Used for the four optional fields so the serializer can
+   * distinguish "absent" via a {@code != null} check and elide the field on the wire.
+   *
+   * <p>The {@code instanceof UTF8BytesString} short-circuit is dead code for {@link
+   * SpanSnapshot#httpMethod}/{@code httpEndpoint}/{@code grpcStatusCode} (statically {@code
+   * String}) but live for {@link SpanSnapshot#serviceNameSource} ({@link CharSequence}); keeping a
+   * single helper keeps the constructor consistent.
+   */
+  @Nullable
+  static UTF8BytesString canonicalizeOptional(
+      DDCache<String, UTF8BytesString> cache, @Nullable CharSequence charSeq) {
+    // Treat null and length-zero the same: both map to null (i.e. "absent"). matches() uses
+    // identity comparison, so the prior content-equals collapse needs to happen here.
+    if (charSeq == null || charSeq.length() == 0) {
+      return null;
+    }
+    if (charSeq instanceof UTF8BytesString) {
+      return (UTF8BytesString) charSeq;
+    }
+    return cache.computeIfAbsent(charSeq.toString(), UTF8BytesString::create);
+  }
+
+  /**
+   * UTF8 vs raw CharSequence content-equality, no allocation in the common (String) case.
+   *
+   * <p>Treats {@code null} and empty (length 0) as equivalent on either side. This matches the
+   * canonicalization semantics: {@link #canonicalize} maps a {@code null} input to {@link
+   * UTF8BytesString#EMPTY}, so an entry built from a snapshot with a null field needs to match a
+   * subsequent snapshot whose field is still null. {@code intHash(null) == 0 == "".hashCode()}, so
+   * the hash already agrees with this view.
+   */
+  private static boolean contentEquals(UTF8BytesString a, CharSequence b) {
+    if (a == null || a.length() == 0) {
+      return b == null || b.length() == 0;
+    }
+    // UTF8BytesString.toString() returns the underlying String -- O(1), no allocation.
+    return b != null && a.toString().contentEquals(b);
+  }
+
+  /**
+   * Encodes the per-span peer-tag values into the {@code List<UTF8BytesString>} the serializer
+   * consumes. Reads name/value pairs at the same index from the schema's names and the snapshot's
+   * values; null value slots are skipped (the span didn't set that peer tag). Counts hits once for
+   * exact-size allocation and preserves the singletonList fast path for the common one-entry case
+   * (e.g. internal-kind base.service).
+   */
+  private static List<UTF8BytesString> materializePeerTags(
+      @Nullable String[] names, @Nullable String[] values) {
+    if (names == null || values == null) {
+      return Collections.emptyList();
+    }
+    int n = names.length;
+    int firstHit = -1;
+    int hitCount = 0;
+    for (int i = 0; i < n; i++) {
+      if (values[i] != null) {
+        if (hitCount == 0) firstHit = i;
+        hitCount++;
+      }
+    }
+    if (hitCount == 0) {
+      return Collections.emptyList();
+    }
+    if (hitCount == 1) {
+      return Collections.singletonList(encodePeerTag(names[firstHit], values[firstHit]));
+    }
+    List<UTF8BytesString> tags = new ArrayList<>(hitCount);
+    for (int i = firstHit; i < n; i++) {
+      if (values[i] != null) {
+        tags.add(encodePeerTag(names[i], values[i]));
+      }
+    }
+    return tags;
+  }
+
+  private static UTF8BytesString encodePeerTag(String name, String value) {
+    final Pair<DDCache<String, UTF8BytesString>, Function<String, UTF8BytesString>>
+        cacheAndCreator = PEER_TAGS_CACHE.computeIfAbsent(name, PEER_TAGS_CACHE_ADDER);
+    return cacheAndCreator.getLeft().computeIfAbsent(value, cacheAndCreator.getRight());
+  }
+}
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateMetric.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateMetric.java
deleted file mode 100644
index dba66a5ab9c..00000000000
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateMetric.java
+++ /dev/null
@@ -1,103 +0,0 @@
-package datadog.trace.common.metrics;
-
-import datadog.metrics.api.Histogram;
-import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
-import java.util.concurrent.atomic.AtomicLongArray;
-
-/** Not thread-safe. Accumulates counts and durations. */
-@SuppressFBWarnings(
-    value = {"AT_NONATOMIC_OPERATIONS_ON_SHARED_VARIABLE", "AT_STALE_THREAD_WRITE_OF_PRIMITIVE"},
-    justification = "Explicitly not thread-safe. Accumulates counts and durations.")
-public final class AggregateMetric {
-
-  static final long ERROR_TAG = 0x8000000000000000L;
-  static final long TOP_LEVEL_TAG = 0x4000000000000000L;
-
-  private final Histogram okLatencies;
-  private final Histogram errorLatencies;
-  private int errorCount;
-  private int hitCount;
-  private int topLevelCount;
-  private long duration;
-
-  public AggregateMetric() {
-    okLatencies = Histogram.newHistogram();
-    errorLatencies = Histogram.newHistogram();
-  }
-
-  public AggregateMetric recordDurations(int count, AtomicLongArray durations) {
-    this.hitCount += count;
-    for (int i = 0; i < count && i < durations.length(); ++i) {
-      long duration = durations.getAndSet(i, 0);
-      if ((duration & TOP_LEVEL_TAG) == TOP_LEVEL_TAG) {
-        duration ^= TOP_LEVEL_TAG;
-        ++topLevelCount;
-      }
-      if ((duration & ERROR_TAG) == ERROR_TAG) {
-        // then it's an error
-        duration ^= ERROR_TAG;
-        errorLatencies.accept(duration);
-        ++errorCount;
-      } else {
-        okLatencies.accept(duration);
-      }
-      this.duration += duration;
-    }
-    return this;
-  }
-
-  /**
-   * Records a single hit. {@code tagAndDuration} carries the duration nanos with optional {@link
-   * #ERROR_TAG} / {@link #TOP_LEVEL_TAG} bits OR-ed in.
-   */
-  public AggregateMetric recordOneDuration(long tagAndDuration) {
-    ++hitCount;
-    if ((tagAndDuration & TOP_LEVEL_TAG) == TOP_LEVEL_TAG) {
-      tagAndDuration ^= TOP_LEVEL_TAG;
-      ++topLevelCount;
-    }
-    if ((tagAndDuration & ERROR_TAG) == ERROR_TAG) {
-      tagAndDuration ^= ERROR_TAG;
-      errorLatencies.accept(tagAndDuration);
-      ++errorCount;
-    } else {
-      okLatencies.accept(tagAndDuration);
-    }
-    duration += tagAndDuration;
-    return this;
-  }
-
-  public int getErrorCount() {
-    return errorCount;
-  }
-
-  public int getHitCount() {
-    return hitCount;
-  }
-
-  public int getTopLevelCount() {
-    return topLevelCount;
-  }
-
-  public long getDuration() {
-    return duration;
-  }
-
-  public Histogram getOkLatencies() {
-    return okLatencies;
-  }
-
-  public Histogram getErrorLatencies() {
-    return errorLatencies;
-  }
-
-  @SuppressFBWarnings("AT_NONATOMIC_64BIT_PRIMITIVE")
-  public void clear() {
-    this.errorCount = 0;
-    this.hitCount = 0;
-    this.topLevelCount = 0;
-    this.duration = 0;
-    this.okLatencies.clear();
-    this.errorLatencies.clear();
-  }
-}
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java
new file mode 100644
index 00000000000..b23e492ce82
--- /dev/null
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java
@@ -0,0 +1,142 @@
+package datadog.trace.common.metrics;
+
+import datadog.trace.util.Hashtable;
+import datadog.trace.util.Hashtable.MutatingTableIterator;
+import datadog.trace.util.Hashtable.Support;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
+
+/**
+ * Consumer-side {@link AggregateEntry} store, keyed on the raw fields of a {@link SpanSnapshot}.
+ *
+ * <p>Replaces the prior {@code LRUCache<MetricKey, AggregateMetric>}. The win is on the
+ * steady-state hit path: a snapshot lookup is a 64-bit hash compute + bucket walk + field-wise
+ * {@code matches}, with no per-snapshot {@link AggregateEntry} allocation and no UTF8 cache
+ * lookups. The UTF8-encoded forms (formerly held on {@code MetricKey}) and the mutable counters
+ * (formerly held on {@code AggregateMetric}) both live on the {@link AggregateEntry} now, built
+ * once per unique key at insert time.
+ *
+ * <p><b>Not thread-safe.</b> The aggregator thread is the sole writer of both this table and its
+ * contained {@link AggregateEntry} state. Any cross-thread request that needs to mutate -- e.g.
+ * {@link ConflatingMetricsAggregator#disable()} -- must funnel onto the aggregator thread via the
+ * inbox (see the {@code ClearSignal} routing in {@link Aggregator}). The invariant is convention-
+ * enforced; nothing here checks the calling thread at runtime, so a wrong-thread call would corrupt
+ * bucket chains silently.
+ */
+final class AggregateTable {
+
+  private final Hashtable.Entry[] buckets;
+  private final int maxAggregates;
+  private int size;
+
+  /**
+   * Bucket index where the last {@link #evictOneStale} successfully removed an entry. The next call
+   * resumes from this bucket so a fast-evicting workload doesn't repeatedly re-walk the same hot
+   * entries clustered near bucket 0. Reset to {@code 0} by {@link #clear}.
+   */
+  private int evictCursor;
+
+  AggregateTable(int maxAggregates) {
+    this.buckets = Support.create(maxAggregates, Support.MAX_RATIO);
+    this.maxAggregates = maxAggregates;
+  }
+
+  int size() {
+    return size;
+  }
+
+  boolean isEmpty() {
+    return size == 0;
+  }
+
+  /**
+   * Returns the {@link AggregateEntry} to update for {@code snapshot}, lazily creating one on miss.
+   * Returns {@code null} when the table is at capacity and no stale entry can be evicted -- the
+   * caller should drop the data point in that case.
+   */
+  AggregateEntry findOrInsert(SpanSnapshot snapshot) {
+    // keyHash is precomputed by the producer in SpanSnapshot.computeAndSetKeyHash, no
+    // re-hashing on the aggregator thread.
+    final long keyHash = snapshot.keyHash;
+    for (AggregateEntry candidate = Support.bucket(buckets, keyHash);
+        candidate != null;
+        candidate = candidate.next()) {
+      if (candidate.matches(keyHash, snapshot)) {
+        return candidate;
+      }
+    }
+    if (size >= maxAggregates && !evictOneStale()) {
+      return null;
+    }
+    AggregateEntry entry = new AggregateEntry(snapshot);
+    Support.insertHeadEntry(buckets, keyHash, entry);
+    size++;
+    return entry;
+  }
+
+  /**
+   * Unlinks the first entry whose {@code getHitCount() == 0}, resuming the scan from {@link
+   * #evictCursor} so back-to-back evictions amortize to O(1) per call. Worst case for a single call
+   * is still O(N) when nearly every entry is hot, but a sustained eviction stream never re-scans
+   * the hot prefix more than twice across N evictions.
+   *
+   * <p>The semantic intent: at cap with all entries live, drop the new key (reported via {@code
+   * onStatsAggregateDropped}) rather than evicting an established one. Cap is sized to the
+   * steady-state working set, so eviction is rare; this cursor optimization handles the
+   * pathological "persistently at cap" case.
+   */
+  private boolean evictOneStale() {
+    // Two passes -- [cursor, length) then [0, cursor) -- using the half-open-range iterator. The
+    // second pass is naturally empty when cursor==0, so no extra check needed.
+    return evictOneStaleInRange(evictCursor, buckets.length)
+        || evictOneStaleInRange(0, evictCursor);
+  }
+
+  /** Scans {@code [startBucket, endBucket)} for the first stale entry and unlinks it. */
+  private boolean evictOneStaleInRange(int startBucket, int endBucket) {
+    MutatingTableIterator<AggregateEntry> iter =
+        Support.mutatingTableIterator(buckets, startBucket, endBucket);
+    while (iter.hasNext()) {
+      AggregateEntry e = iter.next();
+      if (e.getHitCount() == 0) {
+        int bucket = iter.currentBucket();
+        iter.remove();
+        size--;
+        evictCursor = bucket;
+        return true;
+      }
+    }
+    return false;
+  }
+
+  void forEach(Consumer<AggregateEntry> consumer) {
+    Support.forEach(buckets, consumer);
+  }
+
+  /**
+   * Context-passing forEach. Useful for callers that want to avoid a capturing-lambda allocation on
+   * each invocation -- pass a non-capturing {@link BiConsumer} (typically a {@code static final})
+   * plus whatever side-band state it needs as {@code context}.
+   */
+  <T> void forEach(T context, BiConsumer<T, AggregateEntry> consumer) {
+    Support.forEach(buckets, context, consumer);
+  }
+
+  /** Removes entries whose {@code getHitCount() == 0}. */
+  void expungeStaleAggregates() {
+    for (MutatingTableIterator<AggregateEntry> iter = Support.mutatingTableIterator(buckets);
+        iter.hasNext(); ) {
+      AggregateEntry e = iter.next();
+      if (e.getHitCount() == 0) {
+        iter.remove();
+        size--;
+      }
+    }
+  }
+
+  void clear() {
+    Support.clear(buckets);
+    size = 0;
+    evictCursor = 0;
+  }
+}
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java
index 9998c21ed0b..d70a1fbaae1 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java
@@ -1,26 +1,15 @@
 package datadog.trace.common.metrics;
 
-import static datadog.trace.api.Functions.UTF8_ENCODE;
-import static datadog.trace.common.metrics.ConflatingMetricsAggregator.PEER_TAGS_CACHE;
-import static datadog.trace.common.metrics.ConflatingMetricsAggregator.PEER_TAGS_CACHE_ADDER;
-import static datadog.trace.common.metrics.ConflatingMetricsAggregator.SERVICE_NAMES;
-import static datadog.trace.common.metrics.ConflatingMetricsAggregator.SPAN_KINDS;
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
 
-import datadog.trace.api.Pair;
-import datadog.trace.api.cache.DDCache;
-import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
+import datadog.trace.common.metrics.SignalItem.ClearSignal;
 import datadog.trace.common.metrics.SignalItem.StopSignal;
 import datadog.trace.core.monitor.HealthMetrics;
-import datadog.trace.core.util.LRUCache;
+import datadog.trace.util.concurrent.MpscRingBuffer;
 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
 import java.util.concurrent.TimeUnit;
-import java.util.function.Function;
+import java.util.concurrent.locks.LockSupport;
+import java.util.function.BiConsumer;
 import org.jctools.queues.MessagePassingQueue;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -31,9 +20,11 @@ final class Aggregator implements Runnable {
 
   private static final Logger log = LoggerFactory.getLogger(Aggregator.class);
 
-  private final MessagePassingQueue<InboxItem> inbox;
-  private final LRUCache<MetricKey, AggregateMetric> aggregates;
+  private final MpscRingBuffer<SpanSnapshot> dataInbox;
+  private final MessagePassingQueue<SignalItem> signalInbox;
+  private final AggregateTable aggregates;
   private final MetricWriter writer;
+  private final HealthMetrics healthMetrics;
   // the reporting interval controls how much history will be buffered
   // when the agent is unresponsive (only 10 pending requests will be
   // buffered by OkHttpSink)
@@ -55,9 +46,36 @@ final class Aggregator implements Runnable {
       justification = "the field is confined to the agent thread running the Aggregator")
   private boolean dirty;
 
+  @SuppressFBWarnings(
+      value = "AT_STALE_THREAD_WRITE_OF_PRIMITIVE",
+      justification = "the field is confined to the agent thread running the Aggregator")
+  private boolean stopped;
+
+  /**
+   * True iff the aggregator thread is currently parked (or about to park) waiting for new work.
+   * Producers volatile-read this after publishing; on true they call {@link #unparkIfWaiting()} to
+   * wake the aggregator without paying the {@code sleepMillis} latency.
+   *
+   * <p>Volatile so the producer side sees the latest value. Set true before park, false on wake.
+   * The aggregator re-checks the inbox after setting this true (see {@link #run}) to close the race
+   * window: if a producer published just before the aggregator set the flag, the aggregator would
+   * have missed the unpark; the re-check catches that work and skips the park.
+   */
+  private volatile boolean waiting;
+
+  private volatile Thread aggregatorThread;
+
+  /**
+   * Static-singleton snapshot handler. Reads from the slot via {@code this} (passed as the context)
+   * and avoids per-drain lambda capture.
+   */
+  private static final BiConsumer<Aggregator, SpanSnapshot> SNAPSHOT_HANDLER =
+      Aggregator::handleSnapshot;
+
   Aggregator(
       MetricWriter writer,
-      MessagePassingQueue<InboxItem> inbox,
+      MpscRingBuffer<SpanSnapshot> dataInbox,
+      MessagePassingQueue<SignalItem> signalInbox,
       int maxAggregates,
       long reportingInterval,
       TimeUnit reportingIntervalTimeUnit,
@@ -65,7 +83,8 @@ final class Aggregator implements Runnable {
       Runnable onReportCycle) {
     this(
         writer,
-        inbox,
+        dataInbox,
+        signalInbox,
         maxAggregates,
         reportingInterval,
         reportingIntervalTimeUnit,
@@ -76,7 +95,8 @@ final class Aggregator implements Runnable {
 
   Aggregator(
       MetricWriter writer,
-      MessagePassingQueue<InboxItem> inbox,
+      MpscRingBuffer<SpanSnapshot> dataInbox,
+      MessagePassingQueue<SignalItem> signalInbox,
       int maxAggregates,
       long reportingInterval,
       TimeUnit reportingIntervalTimeUnit,
@@ -84,48 +104,45 @@ final class Aggregator implements Runnable {
       HealthMetrics healthMetrics,
       Runnable onReportCycle) {
     this.writer = writer;
-    this.inbox = inbox;
-    this.aggregates =
-        new LRUCache<>(
-            new AggregateExpiry(healthMetrics), maxAggregates * 4 / 3, 0.75f, maxAggregates);
+    this.dataInbox = dataInbox;
+    this.signalInbox = signalInbox;
+    this.aggregates = new AggregateTable(maxAggregates);
     this.reportingIntervalNanos = reportingIntervalTimeUnit.toNanos(reportingInterval);
     this.sleepMillis = sleepMillis;
+    this.healthMetrics = healthMetrics;
     this.onReportCycle = onReportCycle;
   }
 
-  private static final class AggregateExpiry
-      implements LRUCache.ExpiryListener<MetricKey, AggregateMetric> {
-    private final HealthMetrics healthMetrics;
-
-    AggregateExpiry(HealthMetrics healthMetrics) {
-      this.healthMetrics = healthMetrics;
-    }
-
-    @Override
-    public void accept(Map.Entry<MetricKey, AggregateMetric> expired) {
-      if (expired.getValue().getHitCount() > 0) {
-        healthMetrics.onStatsAggregateDropped();
-      }
-    }
-  }
-
-  public void clearAggregates() {
-    this.aggregates.clear();
-  }
-
   @Override
   public void run() {
-    Thread currentThread = Thread.currentThread();
-    Drainer drainer = new Drainer();
-    while (!currentThread.isInterrupted() && !drainer.stopped) {
+    final Thread currentThread = Thread.currentThread();
+    aggregatorThread = currentThread;
+    final long parkNanos = TimeUnit.MILLISECONDS.toNanos(sleepMillis);
+    while (!currentThread.isInterrupted() && !stopped) {
       try {
-        if (!inbox.isEmpty()) {
-          inbox.drain(drainer);
-        } else {
-          Thread.sleep(sleepMillis);
+        int drainedData = dataInbox.drain(this, SNAPSHOT_HANDLER);
+        // Signals are processed after the data drain. REPORT/STOP additionally drain any
+        // snapshots that arrived between the drain above and the signal's enqueue -- the
+        // signal-channel offer happens-before the poll here, so any publish that completed
+        // before report()/stop() is guaranteed visible to the inline drain in handleSignal.
+        SignalItem signal;
+        while (!stopped && (signal = signalInbox.poll()) != null) {
+          handleSignal(signal);
+        }
+        if (stopped) break;
+        if (drainedData == 0 && signalInbox.isEmpty()) {
+          waiting = true;
+          try {
+            // Re-check after publishing the waiting flag. A producer that published just before
+            // we set waiting=true won't have unparked us (they read waiting=false); the re-check
+            // catches that work via the volatile producerCursor.
+            if (dataInbox.isEmpty() && signalInbox.isEmpty()) {
+              LockSupport.parkNanos(this, parkNanos);
+            }
+          } finally {
+            waiting = false;
+          }
         }
-      } catch (InterruptedException e) {
-        currentThread.interrupt();
       } catch (Throwable error) {
         log.debug("error aggregating metrics", error);
       }
@@ -133,93 +150,70 @@ public void run() {
     log.debug("metrics aggregator exited");
   }
 
-  private final class Drainer implements MessagePassingQueue.Consumer<InboxItem> {
-
-    boolean stopped = false;
-
-    @Override
-    public void accept(InboxItem item) {
-      if (item instanceof SignalItem) {
-        SignalItem signal = (SignalItem) item;
-        if (!stopped) {
-          report(wallClockTime(), signal);
-          stopped = item instanceof StopSignal;
-          if (stopped) {
-            signal.complete();
-          }
-        } else {
-          signal.ignore();
-        }
-      } else if (item instanceof SpanSnapshot && !stopped) {
-        SpanSnapshot snapshot = (SpanSnapshot) item;
-        MetricKey key = buildMetricKey(snapshot);
-        AggregateMetric aggregate = aggregates.computeIfAbsent(key, k -> new AggregateMetric());
-        aggregate.recordOneDuration(snapshot.tagAndDuration);
-        dirty = true;
-      }
-    }
-  }
-
-  private static MetricKey buildMetricKey(SpanSnapshot s) {
-    return new MetricKey(
-        s.resourceName,
-        SERVICE_NAMES.computeIfAbsent(s.serviceName, UTF8_ENCODE),
-        s.operationName,
-        s.serviceNameSource,
-        s.spanType,
-        s.httpStatusCode,
-        s.synthetic,
-        s.traceRoot,
-        SPAN_KINDS.computeIfAbsent(s.spanKind, UTF8BytesString::create),
-        materializePeerTags(s.peerTagSchema, s.peerTagValues),
-        s.httpMethod,
-        s.httpEndpoint,
-        s.grpcStatusCode);
-  }
-
   /**
-   * Encodes the per-span peer-tag values into the {@code List<UTF8BytesString>} the {@link
-   * MetricKey} consumes. Reads name/value pairs at the same index from the schema's names and the
-   * snapshot's values; null value slots are skipped (the span didn't set that peer tag).
+   * Wake the aggregator if it's currently parked. Called from producer threads after publishing.
+   *
+   * <p>Cost on the producer hot path is one volatile read; only on a {@code true} read does the
+   * producer pay an {@code unpark} (cheap idempotent atomic). At saturating workloads where the
+   * aggregator is never parked, the volatile read is the only cost.
    */
-  private static List<UTF8BytesString> materializePeerTags(PeerTagSchema schema, String[] values) {
-    if (schema == null || values == null) {
-      return Collections.emptyList();
-    }
-    String[] names = schema.names;
-    int n = names.length;
-    // First pass: count how many tags fired and remember the first index. The single-entry case
-    // is common (e.g. INTERNAL spans only emit base.service) and gets a singletonList to avoid an
-    // ArrayList allocation on the hot path.
-    int firstHit = -1;
-    int hitCount = 0;
-    for (int i = 0; i < n; i++) {
-      if (values[i] != null) {
-        if (hitCount == 0) {
-          firstHit = i;
-        }
-        hitCount++;
+  void unparkIfWaiting() {
+    if (waiting) {
+      final Thread t = aggregatorThread;
+      if (t != null) {
+        LockSupport.unpark(t);
       }
     }
-    if (hitCount == 0) {
-      return Collections.emptyList();
-    }
-    if (hitCount == 1) {
-      return Collections.singletonList(encodePeerTag(names[firstHit], values[firstHit]));
+  }
+
+  private static void handleSnapshot(Aggregator agg, SpanSnapshot snapshot) {
+    // Skip-sentinel from the producer's overclaim path: the producer claimed this slot
+    // speculatively (one slot per span in the trace) but the span turned out to be non-eligible
+    // or the trace hit an ignored-resource. shouldComputeMetric guarantees duration > 0 for any
+    // real publish, so tagAndDuration == 0 uniquely identifies a skip.
+    if (snapshot.tagAndDuration == 0L) {
+      return;
     }
-    List<UTF8BytesString> tags = new ArrayList<>(hitCount);
-    for (int i = firstHit; i < n; i++) {
-      if (values[i] != null) {
-        tags.add(encodePeerTag(names[i], values[i]));
-      }
+    AggregateEntry entry = agg.aggregates.findOrInsert(snapshot);
+    if (entry != null) {
+      entry.recordOneDuration(snapshot.tagAndDuration);
+      agg.dirty = true;
+    } else {
+      // table at cap with no stale entry available to evict
+      agg.healthMetrics.onStatsAggregateDropped();
     }
-    return tags;
   }
 
-  private static UTF8BytesString encodePeerTag(String name, String value) {
-    final Pair<DDCache<String, UTF8BytesString>, Function<String, UTF8BytesString>>
-        cacheAndCreator = PEER_TAGS_CACHE.computeIfAbsent(name, PEER_TAGS_CACHE_ADDER);
-    return cacheAndCreator.getLeft().computeIfAbsent(value, cacheAndCreator.getRight());
+  private void handleSignal(SignalItem signal) {
+    if (signal == ClearSignal.CLEAR) {
+      // ClearSignal is routed through the signal channel (rather than letting the caller mutate
+      // AggregateTable directly) so the aggregator thread stays the sole writer. AggregateTable
+      // is not thread-safe; a direct clear() from e.g. the OkHttpSink callback thread would race
+      // with the data-drain on this thread.
+      //
+      // We do NOT clear the snapshot data ring here. Any in-flight snapshots will drain
+      // naturally into the just-cleared table, get re-aggregated, and flushed on the next
+      // report -- where the agent rejects them again, triggering another DOWNGRADED -> disable()
+      // -> CLEAR cycle. Worst case: one extra reporting cycle of wasted work.
+      aggregates.clear();
+      // Clear dirty too -- without this, the next report() would see dirty=true, run
+      // expungeStaleAggregates against the (now-empty) table, find isEmpty()=true, and skip
+      // the flush anyway. Same observable outcome, but resetting here keeps the invariant
+      // "dirty implies there's data to flush" honest.
+      dirty = false;
+      signal.complete();
+    } else {
+      // STOP or REPORT: catch up on any data that arrived between the loop's main drain and the
+      // signal being enqueued. Producer's signalInbox.offer happens-after its dataInbox publish,
+      // so by the time we observe the signal, every snapshot the producer had published is
+      // visible to drain() here. This is what gives report() bucket-boundary determinism.
+      dataInbox.drain(this, SNAPSHOT_HANDLER);
+      report(wallClockTime(), signal);
+      if (signal instanceof StopSignal) {
+        stopped = true;
+        signal.complete();
+      }
+    }
   }
 
   private void report(long when, SignalItem signal) {
@@ -234,14 +228,16 @@ private void report(long when, SignalItem signal) {
     boolean skipped = true;
     if (dirty) {
       try {
-        expungeStaleAggregates();
+        aggregates.expungeStaleAggregates();
         if (!aggregates.isEmpty()) {
           skipped = false;
           writer.startBucket(aggregates.size(), when, reportingIntervalNanos);
-          for (Map.Entry<MetricKey, AggregateMetric> aggregate : aggregates.entrySet()) {
-            writer.add(aggregate.getKey(), aggregate.getValue());
-            aggregate.getValue().clear();
-          }
+          aggregates.forEach(
+              writer,
+              (w, entry) -> {
+                w.add(entry);
+                entry.clear();
+              });
           // note that this may do IO and block
           writer.finishBucket();
         }
@@ -257,17 +253,6 @@ private void report(long when, SignalItem signal) {
     }
   }
 
-  private void expungeStaleAggregates() {
-    Iterator<Map.Entry<MetricKey, AggregateMetric>> it = aggregates.entrySet().iterator();
-    while (it.hasNext()) {
-      Map.Entry<MetricKey, AggregateMetric> pair = it.next();
-      AggregateMetric metric = pair.getValue();
-      if (metric.getHitCount() == 0) {
-        it.remove();
-      }
-    }
-  }
-
   private long wallClockTime() {
     return MILLISECONDS.toNanos(System.currentTimeMillis());
   }
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java
index dc5d698bcc1..dc6ea86bac7 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java
@@ -5,8 +5,9 @@
 import static datadog.trace.bootstrap.instrumentation.api.Tags.HTTP_ENDPOINT;
 import static datadog.trace.bootstrap.instrumentation.api.Tags.HTTP_METHOD;
 import static datadog.trace.bootstrap.instrumentation.api.Tags.SPAN_KIND;
-import static datadog.trace.common.metrics.AggregateMetric.ERROR_TAG;
-import static datadog.trace.common.metrics.AggregateMetric.TOP_LEVEL_TAG;
+import static datadog.trace.common.metrics.AggregateEntry.ERROR_TAG;
+import static datadog.trace.common.metrics.AggregateEntry.TOP_LEVEL_TAG;
+import static datadog.trace.common.metrics.SignalItem.ClearSignal.CLEAR;
 import static datadog.trace.common.metrics.SignalItem.ReportSignal.REPORT;
 import static datadog.trace.common.metrics.SignalItem.StopSignal.STOP;
 import static datadog.trace.util.AgentThreadFactory.AgentThread.METRICS_AGGREGATOR;
@@ -18,12 +19,8 @@
 import datadog.communication.ddagent.DDAgentFeaturesDiscovery;
 import datadog.communication.ddagent.SharedCommunicationObjects;
 import datadog.trace.api.Config;
-import datadog.trace.api.Pair;
 import datadog.trace.api.WellKnownTags;
-import datadog.trace.api.cache.DDCache;
-import datadog.trace.api.cache.DDCaches;
 import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags;
-import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
 import datadog.trace.common.metrics.SignalItem.ReportSignal;
 import datadog.trace.common.writer.ddagent.DDAgentApi;
 import datadog.trace.core.CoreSpan;
@@ -31,6 +28,7 @@
 import datadog.trace.core.SpanKindFilter;
 import datadog.trace.core.monitor.HealthMetrics;
 import datadog.trace.util.AgentTaskScheduler;
+import datadog.trace.util.concurrent.MpscRingBuffer;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -39,7 +37,6 @@
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
-import java.util.function.Function;
 import org.jctools.queues.MessagePassingQueue;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -51,22 +48,6 @@ public final class ConflatingMetricsAggregator implements MetricsAggregator, Eve
   private static final Map<String, String> DEFAULT_HEADERS =
       Collections.singletonMap(DDAgentApi.DATADOG_META_TRACER_VERSION, DDTraceCoreInfo.VERSION);
 
-  static final DDCache<String, UTF8BytesString> SERVICE_NAMES = DDCaches.newFixedSizeCache(32);
-
-  static final DDCache<CharSequence, UTF8BytesString> SPAN_KINDS = DDCaches.newFixedSizeCache(16);
-  static final DDCache<
-          String, Pair<DDCache<String, UTF8BytesString>, Function<String, UTF8BytesString>>>
-      PEER_TAGS_CACHE =
-          DDCaches.newFixedSizeCache(
-              64); // it can be unbounded since those values are returned by the agent and should be
-  // under control. 64 entries is enough in this case to contain all the peer tags.
-  static final Function<
-          String, Pair<DDCache<String, UTF8BytesString>, Function<String, UTF8BytesString>>>
-      PEER_TAGS_CACHE_ADDER =
-          key ->
-              Pair.of(
-                  DDCaches.newFixedSizeCache(512),
-                  value -> UTF8BytesString.create(key + ":" + value));
   private static final CharSequence SYNTHETICS_ORIGIN = "synthetics";
 
   private static final SpanKindFilter METRICS_ELIGIBLE_KINDS =
@@ -83,9 +64,27 @@ public final class ConflatingMetricsAggregator implements MetricsAggregator, Eve
   private static final SpanKindFilter INTERNAL_KIND =
       SpanKindFilter.builder().includeInternal().build();
 
+  /** Capacity of the small MPSC queue carrying control signals (REPORT / STOP / CLEAR). */
+  private static final int SIGNAL_INBOX_CAPACITY = 16;
+
   private final Set<String> ignoredResources;
   private final Thread thread;
-  private final MessagePassingQueue<InboxItem> inbox;
+
+  /**
+   * Producer/consumer data channel: a {@link MpscRingBuffer} of pre-allocated, recyclable {@link
+   * SpanSnapshot} slots. {@link #publish(List)} claims a range of slots in one CAS via {@link
+   * MpscRingBuffer#tryClaimRange} and fills each in place via {@link #fillSlot} -- no per-publish
+   * allocation.
+   */
+  private final MpscRingBuffer<SpanSnapshot> dataInbox;
+
+  /**
+   * Separate small queue for control signals. Kept off the data ring because the ring only holds
+   * one element type (the slot), and routing signals through a side channel lets the aggregator
+   * service them ahead of the data backlog each loop iteration.
+   */
+  private final MessagePassingQueue<SignalItem> signalInbox;
+
   private final Sink sink;
   private final Aggregator aggregator;
   private final long reportingInterval;
@@ -193,14 +192,16 @@ public ConflatingMetricsAggregator(
       boolean includeEndpointInMetrics) {
     this.ignoredResources = ignoredResources;
     this.includeEndpointInMetrics = includeEndpointInMetrics;
-    this.inbox = Queues.mpscArrayQueue(queueSize);
+    this.dataInbox = new MpscRingBuffer<>(SpanSnapshot::new, queueSize);
+    this.signalInbox = Queues.mpscArrayQueue(SIGNAL_INBOX_CAPACITY);
     this.features = features;
     this.healthMetrics = healthMetric;
     this.sink = sink;
     this.aggregator =
         new Aggregator(
             metricWriter,
-            inbox,
+            dataInbox,
+            signalInbox,
             maxAggregates,
             reportingInterval,
             timeUnit,
@@ -238,11 +239,11 @@ public boolean report() {
     boolean published;
     int attempts = 0;
     do {
-      published = inbox.offer(REPORT);
+      published = signalInbox.offer(REPORT);
       ++attempts;
     } while (!published && attempts < 10);
     if (!published) {
-      log.debug("Skipped metrics reporting because the queue is full");
+      log.debug("Skipped metrics reporting because the signal queue is full");
     }
     return published;
   }
@@ -265,7 +266,7 @@ public Future<Boolean> forceReport() {
     ReportSignal reportSignal = new ReportSignal();
     boolean published = false;
     while (thread.isAlive() && !published) {
-      published = inbox.offer(reportSignal);
+      published = signalInbox.offer(reportSignal);
       if (!published) {
         try {
           Thread.sleep(10);
@@ -284,27 +285,81 @@ public Future<Boolean> forceReport() {
 
   @Override
   public boolean publish(List<? extends CoreSpan<?>> trace) {
+    if (!features.supportsMetrics()) {
+      return false;
+    }
+    final int traceSize = trace.size();
+    if (traceSize == 0) {
+      return false;
+    }
+
+    // Overclaim the worst case: assume every span in the trace is metrics-eligible and claim
+    // that many ring slots in a single CAS. Non-eligible spans get a "skip" sentinel
+    // (tagAndDuration = 0L); the aggregator recognises this and bypasses findOrInsert for those
+    // slots. We expect most spans in a typical trace to be eligible (top-level / measured /
+    // server-client-producer-consumer kinds), so the slot waste is small in exchange for a
+    // single-pass producer loop. Indexed iteration (vs. enhanced-for) avoids per-call Iterator
+    // allocation -- trace is typically a SpanList, but even Collections.singletonList's
+    // iterator() allocates.
+    final long startSeq = dataInbox.tryClaimRange(traceSize);
+    if (startSeq < 0L) {
+      healthMetrics.onStatsInboxFull();
+      return false;
+    }
+
     boolean forceKeep = false;
-    int counted = 0;
-    if (features.supportsMetrics()) {
-      for (CoreSpan<?> span : trace) {
-        boolean isTopLevel = span.isTopLevel();
-        if (shouldComputeMetric(span, isTopLevel)) {
-          final CharSequence resourceName = span.getResourceName();
-          if (resourceName != null && ignoredResources.contains(resourceName.toString())) {
-            // skip publishing all children
-            forceKeep = false;
-            break;
+    int filled = 0;
+
+    for (int i = 0; i < traceSize; i++) {
+      final CoreSpan<?> span = trace.get(i);
+      final long seq = startSeq + i;
+      final SpanSnapshot slot = dataInbox.slotAt(seq);
+      final boolean isTopLevel = span.isTopLevel();
+
+      if (shouldComputeMetric(span, isTopLevel)) {
+        final CharSequence resourceName = span.getResourceName();
+        if (resourceName != null && ignoredResources.contains(resourceName.toString())) {
+          // Ignored resource: drop metrics for the whole trace. Mark this slot AND every
+          // remaining slot in the claimed range as skip-sentinel so the aggregator advances past
+          // them.
+          slot.tagAndDuration = 0L;
+          dataInbox.publish(seq);
+          for (int j = i + 1; j < traceSize; j++) {
+            final long skipSeq = startSeq + j;
+            dataInbox.slotAt(skipSeq).tagAndDuration = 0L;
+            dataInbox.publish(skipSeq);
           }
-          counted++;
-          forceKeep |= publish(span, isTopLevel);
+          healthMetrics.onClientStatTraceComputed(0, traceSize, true);
+          aggregator.unparkIfWaiting();
+          return false;
+        }
+        try {
+          fillSlot(span, isTopLevel, slot);
+        } finally {
+          // Publish in finally so a throwing fillSlot can't strand the consumer at this
+          // sequence -- matches the publish-on-throw guarantee in MpscRingBuffer.tryWrite.
+          dataInbox.publish(seq);
         }
+        forceKeep |= span.getError() > 0;
+        filled++;
+      } else {
+        // Non-eligible span: mark slot as skip-sentinel and publish so the aggregator advances.
+        slot.tagAndDuration = 0L;
+        dataInbox.publish(seq);
       }
-      healthMetrics.onClientStatTraceComputed(counted, trace.size(), !forceKeep);
     }
+
+    healthMetrics.onClientStatTraceComputed(filled, traceSize, !forceKeep);
+    // Wake the aggregator if it parked while we were filling. Producer-side cost is one volatile
+    // read when the aggregator isn't parked.
+    aggregator.unparkIfWaiting();
     return forceKeep;
   }
 
+  // Other site that posts work the aggregator needs to see: ignored-resource short-circuit
+  // returns above before reaching unparkIfWaiting. Mirror that wake-up there too so the
+  // aggregator picks up the skip-sentinel slots quickly.
+
   private boolean shouldComputeMetric(CoreSpan<?> span, boolean isTopLevel) {
     return (span.isMeasured() || isTopLevel || span.isKind(METRICS_ELIGIBLE_KINDS))
         && span.getLongRunningVersion()
@@ -312,21 +367,16 @@ private boolean shouldComputeMetric(CoreSpan<?> span, boolean isTopLevel) {
         && span.getDurationNano() > 0;
   }
 
-  private boolean publish(CoreSpan<?> span, boolean isTopLevel) {
-    // Error decision drives force-keep sampling regardless of whether the snapshot gets queued.
-    boolean error = span.getError() > 0;
-
-    // Fast-path the inbox-full case before any tag extraction or snapshot allocation. size() is
-    // approximate on jctools' MPSC queue but that's fine: if we under-estimate, we fall through
-    // and let inbox.offer be the source of truth (existing behavior); if we over-estimate, we
-    // drop a snapshot that would have fit -- acceptable, onStatsInboxFull was going to fire
-    // imminently anyway.
-    if (inbox.size() >= inbox.capacity()) {
-      healthMetrics.onStatsInboxFull();
-      return error;
-    }
+  /**
+   * Producer-side slot fill. Called between a {@link MpscRingBuffer#tryClaimRange} and the matching
+   * {@link MpscRingBuffer#publish}: reads from {@code span} and instance state, writes every field
+   * on {@code slot} (including {@code null} where applicable) so stale values from a prior occupant
+   * of this slot don't bleed through.
+   */
+  private void fillSlot(CoreSpan<?> span, boolean isTopLevel, SpanSnapshot slot) {
+    final boolean error = span.getError() > 0;
 
-    // Extract HTTP method and endpoint only if the feature is enabled
+    // Extract HTTP method and endpoint only if the feature is enabled.
     String httpMethod = null;
     String httpEndpoint = null;
     if (includeEndpointInMetrics) {
@@ -349,37 +399,63 @@ private boolean publish(CoreSpan<?> span, boolean isTopLevel) {
     long tagAndDuration =
         span.getDurationNano() | (error ? ERROR_TAG : 0L) | (isTopLevel ? TOP_LEVEL_TAG : 0L);
 
+    // Fill the slot's reusable peerTagValues scratch in place. The slot owns its array across
+    // publishes; we only allocate when the slot has none or the schema's length changed. When
+    // no tags fire, leave the scratch in place and clear peerTagSchema -- consumers gate on
+    // peerTagSchema (see AggregateEntry.matches / hashOf), so stale scratch contents are inert.
     PeerTagSchema peerTagSchema = peerTagSchemaFor(span);
-    String[] peerTagValues =
-        peerTagSchema == null ? null : capturePeerTagValues(span, peerTagSchema);
-    if (peerTagValues == null) {
-      // No tags fired -- drop the schema reference so the consumer doesn't bother iterating an
-      // all-null array.
-      peerTagSchema = null;
+    if (peerTagSchema != null) {
+      final String[] names = peerTagSchema.names;
+      final int n = names.length;
+      String[] valuesArr = slot.peerTagValues;
+      if (valuesArr == null || valuesArr.length != n) {
+        valuesArr = new String[n];
+        slot.peerTagValues = valuesArr;
+      }
+      boolean anyHit = false;
+      for (int i = 0; i < n; i++) {
+        Object v = span.unsafeGetTag(names[i]);
+        String tag = v == null ? null : v.toString();
+        valuesArr[i] = tag;
+        anyHit |= (tag != null);
+      }
+      if (!anyHit) {
+        // Schema fired no tags -- treat like the no-peer-tag case. Keep valuesArr alive on the
+        // slot for the next publish; nullifying peerTagSchema makes downstream gates skip it.
+        peerTagSchema = null;
+      }
     }
 
-    SpanSnapshot snapshot =
-        new SpanSnapshot(
-            span.getResourceName(),
-            span.getServiceName(),
-            span.getOperationName(),
-            span.getServiceNameSource(),
-            spanType,
-            span.getHttpStatusCode(),
-            isSynthetic(span),
-            span.getParentId() == 0,
-            spanKind,
-            peerTagSchema,
-            peerTagValues,
-            httpMethod,
-            httpEndpoint,
-            grpcStatusCode,
-            tagAndDuration);
-    if (!inbox.offer(snapshot)) {
-      healthMetrics.onStatsInboxFull();
-    }
-    // force keep keys if there are errors
-    return error;
+    // Canonicalize every string field to UTF8BytesString via the per-field DDCaches on
+    // AggregateEntry. This work used to happen on the aggregator thread; doing it here
+    // distributes the cost across producer threads and gives the aggregator's matches() identity
+    // comparisons instead of contentEquals chains.
+    slot.resourceName =
+        AggregateEntry.canonicalize(AggregateEntry.RESOURCE_CACHE, span.getResourceName());
+    slot.serviceName =
+        AggregateEntry.canonicalize(AggregateEntry.SERVICE_CACHE, span.getServiceName());
+    slot.operationName =
+        AggregateEntry.canonicalize(AggregateEntry.OPERATION_CACHE, span.getOperationName());
+    slot.serviceNameSource =
+        AggregateEntry.canonicalizeOptional(
+            AggregateEntry.SERVICE_SOURCE_CACHE, span.getServiceNameSource());
+    slot.spanType = AggregateEntry.canonicalize(AggregateEntry.TYPE_CACHE, spanType);
+    slot.httpStatusCode = span.getHttpStatusCode();
+    slot.synthetic = isSynthetic(span);
+    slot.traceRoot = span.getParentId() == 0;
+    slot.spanKind = AggregateEntry.canonicalize(AggregateEntry.SPAN_KIND_CACHE, spanKind);
+    slot.peerTagSchema = peerTagSchema;
+    slot.httpMethod =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.HTTP_METHOD_CACHE, httpMethod);
+    slot.httpEndpoint =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.HTTP_ENDPOINT_CACHE, httpEndpoint);
+    slot.grpcStatusCode =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.GRPC_STATUS_CODE_CACHE, grpcStatusCode);
+    slot.tagAndDuration = tagAndDuration;
+
+    // Precompute the table-lookup key hash on the producer side. AggregateTable#findOrInsert
+    // reads this directly instead of running the chained hashOf on the aggregator thread.
+    SpanSnapshot.computeAndSetKeyHash(slot);
   }
 
   /**
@@ -464,28 +540,6 @@ private void reconcilePeerTagSchema() {
     }
   }
 
-  /**
-   * Captures the span's peer-tag values into a {@code String[]} parallel to {@code schema.names}.
-   * Slots remain {@code null} for tags the span didn't set; the array itself is lazily allocated on
-   * the first hit so spans that fire no peer tags pay zero allocation. Returns {@code null} when
-   * none of the configured peer tags are set on the span.
-   */
-  private static String[] capturePeerTagValues(CoreSpan<?> span, PeerTagSchema schema) {
-    String[] names = schema.names;
-    int n = names.length;
-    String[] values = null;
-    for (int i = 0; i < n; i++) {
-      Object v = span.unsafeGetTag(names[i]);
-      if (v != null) {
-        if (values == null) {
-          values = new String[n];
-        }
-        values[i] = v.toString();
-      }
-    }
-    return values;
-  }
-
   private static boolean isSynthetic(CoreSpan<?> span) {
     return span.getOrigin() != null && SYNTHETICS_ORIGIN.equals(span.getOrigin().toString());
   }
@@ -494,7 +548,7 @@ public void stop() {
     if (null != cancellation) {
       cancellation.cancel();
     }
-    inbox.offer(STOP);
+    signalInbox.offer(STOP);
   }
 
   @Override
@@ -532,8 +586,18 @@ private void disable() {
     features.discover();
     if (!features.supportsMetrics()) {
       log.debug("Disabling metric reporting because an agent downgrade was detected");
-      this.inbox.clear();
-      this.aggregator.clearAggregates();
+      // Route the clear through the signal channel so the aggregator thread is the only writer.
+      // AggregateTable is not thread-safe; mutating it directly from this thread would race
+      // with snapshot processing on the aggregator thread.
+      //
+      // Best-effort single offer rather than the retry-loop pattern in report(). If the signal
+      // queue is full at downgrade time the clear is dropped, but the system self-heals:
+      // features.discover() already flipped supportsMetrics() false, so producer publish() calls
+      // now skip the data ring; the aggregator drains existing snapshots and ships them on the
+      // next report cycle; the sink rejects that payload and fires DOWNGRADED again, which
+      // retries disable() against a now-empty signal queue. Worst case: one extra reporting
+      // cycle of stale data.
+      signalInbox.offer(CLEAR);
     }
   }
 
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/InboxItem.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/InboxItem.java
index 7d66cad6a15..e7c37f91768 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/InboxItem.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/InboxItem.java
@@ -4,6 +4,17 @@
 
 interface InboxItem {}
 
+/**
+ * Inbox-routed control message. Each subclass exposes a process-wide {@code static final} singleton
+ * ({@link StopSignal#STOP}, {@link ReportSignal#REPORT}, {@link ClearSignal#CLEAR}) for the common
+ * fire-and-forget case and is also directly instantiable when a caller needs to await handling.
+ *
+ * <p><b>Singletons are fire-and-forget.</b> The inherited {@link #future} is completed on first
+ * handling by the aggregator thread and never reset, so a second posting of the same singleton
+ * cannot signal completion to a fresh awaiter -- the future is already done. Callers that want
+ * one-shot completion semantics (e.g. {@code forceReport()}) must allocate a fresh instance ({@code
+ * new ReportSignal()}) rather than reusing the singleton.
+ */
 abstract class SignalItem implements InboxItem {
   final CompletableFuture<Boolean> future;
 
@@ -20,12 +31,26 @@ void ignore() {
   }
 
   static final class StopSignal extends SignalItem {
+    /** Fire-and-forget singleton. See class-level note on {@link SignalItem}. */
     static final StopSignal STOP = new StopSignal();
 
     private StopSignal() {}
   }
 
   static final class ReportSignal extends SignalItem {
+    /** Fire-and-forget singleton; {@code forceReport()} allocates fresh instances. */
     static final ReportSignal REPORT = new ReportSignal();
   }
+
+  /**
+   * Posted from arbitrary threads (e.g. the Sink event thread during agent downgrade) so the
+   * aggregator thread is the one that actually performs the table reset. Keeps {@link
+   * AggregateTable} and {@code inbox.clear()} single-writer.
+   */
+  static final class ClearSignal extends SignalItem {
+    /** Fire-and-forget singleton. See class-level note on {@link SignalItem}. */
+    static final ClearSignal CLEAR = new ClearSignal();
+
+    private ClearSignal() {}
+  }
 }
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricKey.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricKey.java
deleted file mode 100644
index 9e2e2098d1f..00000000000
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricKey.java
+++ /dev/null
@@ -1,178 +0,0 @@
-package datadog.trace.common.metrics;
-
-import static datadog.trace.bootstrap.instrumentation.api.UTF8BytesString.EMPTY;
-
-import datadog.trace.api.cache.DDCache;
-import datadog.trace.api.cache.DDCaches;
-import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
-import datadog.trace.util.HashingUtils;
-import java.util.Collections;
-import java.util.List;
-import java.util.Objects;
-
-/** The aggregation key for tracked metrics. */
-public final class MetricKey {
-  static final DDCache<String, UTF8BytesString> RESOURCE_CACHE = DDCaches.newFixedSizeCache(32);
-  static final DDCache<String, UTF8BytesString> SERVICE_CACHE = DDCaches.newFixedSizeCache(8);
-  static final DDCache<String, UTF8BytesString> SERVICE_SOURCE_CACHE =
-      DDCaches.newFixedSizeCache(16);
-  static final DDCache<String, UTF8BytesString> OPERATION_CACHE = DDCaches.newFixedSizeCache(64);
-  static final DDCache<String, UTF8BytesString> TYPE_CACHE = DDCaches.newFixedSizeCache(8);
-  static final DDCache<String, UTF8BytesString> KIND_CACHE = DDCaches.newFixedSizeCache(8);
-  static final DDCache<String, UTF8BytesString> HTTP_METHOD_CACHE = DDCaches.newFixedSizeCache(8);
-  static final DDCache<String, UTF8BytesString> HTTP_ENDPOINT_CACHE =
-      DDCaches.newFixedSizeCache(32);
-  static final DDCache<String, UTF8BytesString> GRPC_STATUS_CODE_CACHE =
-      DDCaches.newFixedSizeCache(32);
-
-  private final UTF8BytesString resource;
-  private final UTF8BytesString service;
-  private final UTF8BytesString serviceSource;
-  private final UTF8BytesString operationName;
-  private final UTF8BytesString type;
-  private final int httpStatusCode;
-  private final boolean synthetics;
-  private final int hash;
-  private final boolean isTraceRoot;
-  private final UTF8BytesString spanKind;
-  private final List<UTF8BytesString> peerTags;
-  private final UTF8BytesString httpMethod;
-  private final UTF8BytesString httpEndpoint;
-  private final UTF8BytesString grpcStatusCode;
-
-  public MetricKey(
-      CharSequence resource,
-      CharSequence service,
-      CharSequence operationName,
-      CharSequence serviceSource,
-      CharSequence type,
-      int httpStatusCode,
-      boolean synthetics,
-      boolean isTraceRoot,
-      CharSequence spanKind,
-      List<UTF8BytesString> peerTags,
-      CharSequence httpMethod,
-      CharSequence httpEndpoint,
-      CharSequence grpcStatusCode) {
-    this.resource = null == resource ? EMPTY : utf8(RESOURCE_CACHE, resource);
-    this.service = null == service ? EMPTY : utf8(SERVICE_CACHE, service);
-    this.serviceSource = null == serviceSource ? null : utf8(SERVICE_SOURCE_CACHE, serviceSource);
-    this.operationName = null == operationName ? EMPTY : utf8(OPERATION_CACHE, operationName);
-    this.type = null == type ? EMPTY : utf8(TYPE_CACHE, type);
-    this.httpStatusCode = httpStatusCode;
-    this.synthetics = synthetics;
-    this.isTraceRoot = isTraceRoot;
-    this.spanKind = null == spanKind ? EMPTY : utf8(KIND_CACHE, spanKind);
-    this.peerTags = peerTags == null ? Collections.emptyList() : peerTags;
-    this.httpMethod = httpMethod == null ? null : utf8(HTTP_METHOD_CACHE, httpMethod);
-    this.httpEndpoint = httpEndpoint == null ? null : utf8(HTTP_ENDPOINT_CACHE, httpEndpoint);
-    this.grpcStatusCode =
-        grpcStatusCode == null ? null : utf8(GRPC_STATUS_CODE_CACHE, grpcStatusCode);
-
-    int tmpHash = 0;
-    tmpHash = HashingUtils.addToHash(tmpHash, this.isTraceRoot);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.spanKind);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.peerTags);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.resource);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.service);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.operationName);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.type);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.httpStatusCode);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.synthetics);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.serviceSource);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.httpEndpoint);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.httpMethod);
-    tmpHash = HashingUtils.addToHash(tmpHash, this.grpcStatusCode);
-    this.hash = tmpHash;
-  }
-
-  static UTF8BytesString utf8(DDCache<String, UTF8BytesString> cache, CharSequence charSeq) {
-    if (charSeq instanceof UTF8BytesString) {
-      return (UTF8BytesString) charSeq;
-    } else {
-      return cache.computeIfAbsent(charSeq.toString(), UTF8BytesString::create);
-    }
-  }
-
-  public UTF8BytesString getResource() {
-    return resource;
-  }
-
-  public UTF8BytesString getService() {
-    return service;
-  }
-
-  public UTF8BytesString getServiceSource() {
-    return serviceSource;
-  }
-
-  public UTF8BytesString getOperationName() {
-    return operationName;
-  }
-
-  public UTF8BytesString getType() {
-    return type;
-  }
-
-  public int getHttpStatusCode() {
-    return httpStatusCode;
-  }
-
-  public boolean isSynthetics() {
-    return synthetics;
-  }
-
-  public boolean isTraceRoot() {
-    return isTraceRoot;
-  }
-
-  public UTF8BytesString getSpanKind() {
-    return spanKind;
-  }
-
-  public List<UTF8BytesString> getPeerTags() {
-    return peerTags;
-  }
-
-  public UTF8BytesString getHttpMethod() {
-    return httpMethod;
-  }
-
-  public UTF8BytesString getHttpEndpoint() {
-    return httpEndpoint;
-  }
-
-  public UTF8BytesString getGrpcStatusCode() {
-    return grpcStatusCode;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if ((o instanceof MetricKey)) {
-      MetricKey metricKey = (MetricKey) o;
-      return hash == metricKey.hash
-          && synthetics == metricKey.synthetics
-          && httpStatusCode == metricKey.httpStatusCode
-          && resource.equals(metricKey.resource)
-          && service.equals(metricKey.service)
-          && operationName.equals(metricKey.operationName)
-          && type.equals(metricKey.type)
-          && isTraceRoot == metricKey.isTraceRoot
-          && spanKind.equals(metricKey.spanKind)
-          && peerTags.equals(metricKey.peerTags)
-          && Objects.equals(serviceSource, metricKey.serviceSource)
-          && Objects.equals(httpMethod, metricKey.httpMethod)
-          && Objects.equals(httpEndpoint, metricKey.httpEndpoint)
-          && Objects.equals(grpcStatusCode, metricKey.grpcStatusCode);
-    }
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return hash;
-  }
-}
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricWriter.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricWriter.java
index fa26ed2e5db..905ba498760 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricWriter.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricWriter.java
@@ -3,7 +3,11 @@
 public interface MetricWriter {
   void startBucket(int metricCount, long start, long duration);
 
-  void add(MetricKey key, AggregateMetric aggregate);
+  /**
+   * Serialize one aggregate. The {@link AggregateEntry} carries both the label fields (resource,
+   * service, span.kind, peer tags, etc.) and the counters being reported.
+   */
+  void add(AggregateEntry entry);
 
   void finishBucket();
 
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java
index 4821d1b33a4..d3a3d47d65a 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java
@@ -43,6 +43,14 @@ final class PeerTagSchema {
 
   final String[] names;
 
+  /**
+   * Precomputed {@code Arrays.hashCode(names)}. The schema is shared across many publishes so
+   * recomputing it on the aggregator hot path (per-publish call to {@code AggregateEntry.hashOf})
+   * was waste -- it showed up as a top aggregator-thread sample. Cached here, computed once at
+   * construction.
+   */
+  final int namesHash;
+
   /**
    * The {@code DDAgentFeaturesDiscovery.state()} hash this schema was built from. The aggregator
    * thread reads and updates this once per reporting cycle when reconciling against the latest
@@ -54,6 +62,7 @@ final class PeerTagSchema {
 
   private PeerTagSchema(String[] names, String state) {
     this.names = names;
+    this.namesHash = java.util.Arrays.hashCode(names);
     this.state = state;
   }
 
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SerializingMetricWriter.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SerializingMetricWriter.java
index 0f84964e9db..c9fb15b4d0c 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SerializingMetricWriter.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SerializingMetricWriter.java
@@ -142,12 +142,12 @@ public void startBucket(int metricCount, long start, long duration) {
   }
 
   @Override
-  public void add(MetricKey key, AggregateMetric aggregate) {
+  public void add(AggregateEntry entry) {
     // Calculate dynamic map size based on optional fields
-    final boolean hasHttpMethod = key.getHttpMethod() != null;
-    final boolean hasHttpEndpoint = key.getHttpEndpoint() != null;
-    final boolean hasServiceSource = key.getServiceSource() != null;
-    final boolean hasGrpcStatusCode = key.getGrpcStatusCode() != null;
+    final boolean hasHttpMethod = entry.getHttpMethod() != null;
+    final boolean hasHttpEndpoint = entry.getHttpEndpoint() != null;
+    final boolean hasServiceSource = entry.getServiceSource() != null;
+    final boolean hasGrpcStatusCode = entry.getGrpcStatusCode() != null;
     final int mapSize =
         15
             + (hasServiceSource ? 1 : 0)
@@ -158,31 +158,31 @@ public void add(MetricKey key, AggregateMetric aggregate) {
     writer.startMap(mapSize);
 
     writer.writeUTF8(NAME);
-    writer.writeUTF8(key.getOperationName());
+    writer.writeUTF8(entry.getOperationName());
 
     writer.writeUTF8(SERVICE);
-    writer.writeUTF8(key.getService());
+    writer.writeUTF8(entry.getService());
 
     writer.writeUTF8(RESOURCE);
-    writer.writeUTF8(key.getResource());
+    writer.writeUTF8(entry.getResource());
 
     writer.writeUTF8(TYPE);
-    writer.writeUTF8(key.getType());
+    writer.writeUTF8(entry.getType());
 
     writer.writeUTF8(HTTP_STATUS_CODE);
-    writer.writeInt(key.getHttpStatusCode());
+    writer.writeInt(entry.getHttpStatusCode());
 
     writer.writeUTF8(SYNTHETICS);
-    writer.writeBoolean(key.isSynthetics());
+    writer.writeBoolean(entry.isSynthetics());
 
     writer.writeUTF8(IS_TRACE_ROOT);
-    writer.writeInt(key.isTraceRoot() ? TRISTATE_TRUE : TRISTATE_FALSE);
+    writer.writeInt(entry.isTraceRoot() ? TRISTATE_TRUE : TRISTATE_FALSE);
 
     writer.writeUTF8(SPAN_KIND);
-    writer.writeUTF8(key.getSpanKind());
+    writer.writeUTF8(entry.getSpanKind());
 
     writer.writeUTF8(PEER_TAGS);
-    final List<UTF8BytesString> peerTags = key.getPeerTags();
+    final List<UTF8BytesString> peerTags = entry.getPeerTags();
     writer.startArray(peerTags.size());
 
     for (UTF8BytesString peerTag : peerTags) {
@@ -191,43 +191,68 @@ public void add(MetricKey key, AggregateMetric aggregate) {
 
     if (hasServiceSource) {
       writer.writeUTF8(SERVICE_SOURCE);
-      writer.writeUTF8(key.getServiceSource());
+      writer.writeUTF8(entry.getServiceSource());
     }
     // Only include HTTPMethod if present
     if (hasHttpMethod) {
       writer.writeUTF8(HTTP_METHOD);
-      writer.writeUTF8(key.getHttpMethod());
+      writer.writeUTF8(entry.getHttpMethod());
     }
 
     // Only include HTTPEndpoint if present
     if (hasHttpEndpoint) {
       writer.writeUTF8(HTTP_ENDPOINT);
-      writer.writeUTF8(key.getHttpEndpoint());
+      writer.writeUTF8(entry.getHttpEndpoint());
     }
 
     // Only include GRPCStatusCode if present (rpc-type spans)
     if (hasGrpcStatusCode) {
       writer.writeUTF8(GRPC_STATUS_CODE);
-      writer.writeUTF8(key.getGrpcStatusCode());
+      writer.writeUTF8(entry.getGrpcStatusCode());
     }
 
     writer.writeUTF8(HITS);
-    writer.writeInt(aggregate.getHitCount());
+    writer.writeInt(entry.getHitCount());
 
     writer.writeUTF8(ERRORS);
-    writer.writeInt(aggregate.getErrorCount());
+    writer.writeInt(entry.getErrorCount());
 
     writer.writeUTF8(TOP_LEVEL_HITS);
-    writer.writeInt(aggregate.getTopLevelCount());
+    writer.writeInt(entry.getTopLevelCount());
 
     writer.writeUTF8(DURATION);
-    writer.writeLong(aggregate.getDuration());
+    writer.writeLong(entry.getDuration());
 
     writer.writeUTF8(OK_SUMMARY);
-    writer.writeBinary(aggregate.getOkLatencies().serialize());
+    writer.writeBinary(entry.getOkLatencies().serialize());
 
     writer.writeUTF8(ERROR_SUMMARY);
-    writer.writeBinary(aggregate.getErrorLatencies().serialize());
+    final datadog.metrics.api.Histogram errorLatencies = entry.getErrorLatencies();
+    if (errorLatencies != null) {
+      writer.writeBinary(errorLatencies.serialize());
+    } else {
+      // Entry never saw an error; emit a cached empty-histogram payload so the wire format is
+      // unchanged without allocating a histogram per entry.
+      writer.writeBinary(emptyErrorHistogramBytes());
+    }
+  }
+
+  private byte[] emptyHistogramBytesCache;
+
+  /**
+   * Returns the cached serialized form of an empty histogram. Computed lazily on first call so the
+   * {@link datadog.metrics.api.Histograms} factory has been registered (by the producer-side tracer
+   * startup or test setup) before we sample its output.
+   */
+  private byte[] emptyErrorHistogramBytes() {
+    byte[] cached = emptyHistogramBytesCache;
+    if (cached == null) {
+      java.nio.ByteBuffer buf = datadog.metrics.api.Histogram.newHistogram().serialize();
+      cached = new byte[buf.remaining()];
+      buf.get(cached);
+      emptyHistogramBytesCache = cached;
+    }
+    return cached;
   }
 
   @Override
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java
index eb9b741cea6..f10d60167cc 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java
@@ -1,75 +1,118 @@
 package datadog.trace.common.metrics;
 
+import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
+import datadog.trace.util.LongHashingUtils;
+import java.util.Arrays;
+import javax.annotation.Nullable;
+
 /**
- * Immutable per-span value posted from the producer to the aggregator thread. Carries the raw
- * inputs the aggregator needs to build a {@link MetricKey} and update an {@link AggregateMetric}.
+ * Per-span value posted from the producer to the aggregator thread. Carries the canonical inputs
+ * the aggregator needs to look up or build an {@link AggregateEntry} and update its counters.
+ *
+ * <p>Fields are mutable so this class can serve as a slot in {@link
+ * datadog.trace.util.concurrent.MpscRingBuffer}: the producer claims a slot, writes its fields, and
+ * publishes; the aggregator reads them. There is exactly one writer per outstanding sequence (the
+ * producer that claimed it) and one reader (the aggregator), so no synchronization on fields is
+ * required.
  *
- * <p>All cache-canonicalization (service-name, span-kind, peer-tag string interning) happens on the
- * aggregator thread; the producer just shuffles references.
+ * <p><b>Producer-side canonicalization and hashing.</b> String fields are canonicalized to {@link
+ * UTF8BytesString} on the producer side (see {@link AggregateEntry#canonicalize}); same-content
+ * strings collapse to the same canonical reference through the per-field {@code DDCache} instances
+ * on {@link AggregateEntry}. The producer also precomputes {@link #keyHash} via the same chain the
+ * consumer used to use. The aggregator then does identity comparisons in {@link
+ * AggregateEntry#matches} and a precomputed-hash table lookup, with all the per-field work
+ * distributed across producer threads where there's CPU headroom.
  */
-final class SpanSnapshot implements InboxItem {
-
-  final CharSequence resourceName;
-  final String serviceName;
-  final CharSequence operationName;
-  final CharSequence serviceNameSource;
-  final CharSequence spanType;
-  final short httpStatusCode;
-  final boolean synthetic;
-  final boolean traceRoot;
-  final String spanKind;
+final class SpanSnapshot {
+
+  /** Canonical UTF-8 form of the span's resource name. {@link UTF8BytesString#EMPTY} when null. */
+  UTF8BytesString resourceName;
+
+  /** Canonical UTF-8 form of the span's service. {@link UTF8BytesString#EMPTY} when null. */
+  UTF8BytesString serviceName;
+
+  /** Canonical UTF-8 form of the span's operation name. {@link UTF8BytesString#EMPTY} when null. */
+  UTF8BytesString operationName;
+
+  /** Canonical UTF-8 form, or {@code null} when the span had no service-name source set. */
+  @Nullable UTF8BytesString serviceNameSource;
+
+  /** Canonical UTF-8 form of the span's type. {@link UTF8BytesString#EMPTY} when null. */
+  UTF8BytesString spanType;
+
+  short httpStatusCode;
+  boolean synthetic;
+  boolean traceRoot;
+
+  /** Canonical UTF-8 form of the span kind. {@link UTF8BytesString#EMPTY} when not set. */
+  UTF8BytesString spanKind;
 
   /**
    * Schema for {@link #peerTagValues}. {@code null} when the span has no peer tags. The schema
    * carries the names in parallel-array form; {@code peerTagValues} holds the per-span tag values
    * at the same indices.
    */
-  final PeerTagSchema peerTagSchema;
+  @Nullable PeerTagSchema peerTagSchema;
 
   /**
    * Peer tag values captured from the span, parallel to {@code peerTagSchema.names}. A {@code null}
-   * entry means the span didn't have that peer tag set. {@code null} (the whole array) when {@link
-   * #peerTagSchema} is {@code null}.
+   * entry means the span didn't have that peer tag set. These are per-publish strings (typically
+   * not shared across calls), so they stay as {@link String} -- canonicalizing them is more
+   * expensive than the aggregator's content-comparison on a 1-3 element array.
+   *
+   * <p>The slot owns this array as a reusable scratch buffer; see {@code
+   * ConflatingMetricsAggregator#fillSlot}.
    */
-  final String[] peerTagValues;
+  @Nullable String[] peerTagValues;
 
-  final String httpMethod;
-  final String httpEndpoint;
-  final String grpcStatusCode;
+  @Nullable UTF8BytesString httpMethod;
+  @Nullable UTF8BytesString httpEndpoint;
+  @Nullable UTF8BytesString grpcStatusCode;
 
   /** Duration in nanoseconds, OR-ed with {@code ERROR_TAG} / {@code TOP_LEVEL_TAG} as needed. */
-  final long tagAndDuration;
-
-  SpanSnapshot(
-      CharSequence resourceName,
-      String serviceName,
-      CharSequence operationName,
-      CharSequence serviceNameSource,
-      CharSequence spanType,
-      short httpStatusCode,
-      boolean synthetic,
-      boolean traceRoot,
-      String spanKind,
-      PeerTagSchema peerTagSchema,
-      String[] peerTagValues,
-      String httpMethod,
-      String httpEndpoint,
-      String grpcStatusCode,
-      long tagAndDuration) {
-    this.resourceName = resourceName;
-    this.serviceName = serviceName;
-    this.operationName = operationName;
-    this.serviceNameSource = serviceNameSource;
-    this.spanType = spanType;
-    this.httpStatusCode = httpStatusCode;
-    this.synthetic = synthetic;
-    this.traceRoot = traceRoot;
-    this.spanKind = spanKind;
-    this.peerTagSchema = peerTagSchema;
-    this.peerTagValues = peerTagValues;
-    this.httpMethod = httpMethod;
-    this.httpEndpoint = httpEndpoint;
-    this.grpcStatusCode = grpcStatusCode;
-    this.tagAndDuration = tagAndDuration;
+  long tagAndDuration;
+
+  /**
+   * Hashtable bucket key, computed by the producer at the end of {@code fillSlot} via {@link
+   * #computeAndSetKeyHash}. {@code AggregateTable#findOrInsert} uses this directly instead of
+   * re-running the chained hash on the aggregator thread.
+   */
+  long keyHash;
+
+  /** No-arg constructor used by {@link datadog.trace.util.concurrent.MpscRingBuffer}. */
+  SpanSnapshot() {}
+
+  /**
+   * Computes and stores {@link #keyHash} from the snapshot's current canonical fields. The producer
+   * calls this at the end of {@code fillSlot}; the aggregator then uses {@code keyHash} directly
+   * for bucket lookup in {@code AggregateTable}.
+   *
+   * <p>Mixing identical to the prior {@code AggregateEntry.hashOf} so the bucket distribution is
+   * unchanged: {@code UTF8BytesString.hashCode} returns the underlying {@code String} hash, so
+   * snapshots built from {@code String} and {@code UTF8BytesString} of the same content fold to the
+   * same long.
+   */
+  static void computeAndSetKeyHash(SpanSnapshot s) {
+    long h = 0;
+    h = LongHashingUtils.addToHash(h, s.resourceName);
+    h = LongHashingUtils.addToHash(h, s.serviceName);
+    h = LongHashingUtils.addToHash(h, s.operationName);
+    h = LongHashingUtils.addToHash(h, s.serviceNameSource);
+    h = LongHashingUtils.addToHash(h, s.spanType);
+    h = LongHashingUtils.addToHash(h, s.httpStatusCode);
+    h = LongHashingUtils.addToHash(h, s.synthetic);
+    h = LongHashingUtils.addToHash(h, s.traceRoot);
+    h = LongHashingUtils.addToHash(h, s.spanKind);
+    // peerTagValues is gated by peerTagSchema -- the slot's array is a reusable scratch buffer
+    // that may carry stale contents from a prior tag-firing publish when this publish had no
+    // peer tags. Hash it only when the schema says it's meaningful.
+    h = LongHashingUtils.addToHash(h, s.peerTagSchema == null ? 0 : s.peerTagSchema.namesHash);
+    h =
+        LongHashingUtils.addToHash(
+            h, s.peerTagSchema == null ? 0 : Arrays.hashCode(s.peerTagValues));
+    h = LongHashingUtils.addToHash(h, s.httpMethod);
+    h = LongHashingUtils.addToHash(h, s.httpEndpoint);
+    h = LongHashingUtils.addToHash(h, s.grpcStatusCode);
+    s.keyHash = h;
   }
 }
diff --git a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/AggregateMetricTest.groovy b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/AggregateMetricTest.groovy
deleted file mode 100644
index 140149d8324..00000000000
--- a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/AggregateMetricTest.groovy
+++ /dev/null
@@ -1,105 +0,0 @@
-package datadog.trace.common.metrics
-
-import datadog.metrics.agent.AgentMeter
-import datadog.metrics.impl.DDSketchHistograms
-import datadog.metrics.impl.MonitoringImpl
-import datadog.metrics.api.statsd.StatsDClient
-import datadog.trace.test.util.DDSpecification
-
-import java.util.concurrent.TimeUnit
-import java.util.concurrent.atomic.AtomicLongArray
-
-import static datadog.trace.common.metrics.AggregateMetric.ERROR_TAG
-import static datadog.trace.common.metrics.AggregateMetric.TOP_LEVEL_TAG
-
-class AggregateMetricTest extends DDSpecification {
-
-  def setupSpec() {
-    // Initialize AgentMeter with monitoring - this is the standard mechanism used in production
-    def monitoring = new MonitoringImpl(StatsDClient.NO_OP, 1, TimeUnit.SECONDS)
-    AgentMeter.registerIfAbsent(StatsDClient.NO_OP, monitoring, DDSketchHistograms.FACTORY)
-    // Create a timer to trigger DDSketchHistograms loading and Factory registration
-    // This simulates what happens during CoreTracer initialization (traceWriteTimer)
-    monitoring.newTimer("test.init")
-  }
-
-  def "record durations sums up to total"() {
-    given:
-    AggregateMetric aggregate = new AggregateMetric()
-    when:
-    aggregate.recordDurations(3, new AtomicLongArray(1, 2, 3))
-    then:
-    aggregate.getDuration() == 6
-  }
-
-  def "total durations include errors"() {
-    given:
-    AggregateMetric aggregate = new AggregateMetric()
-    when:
-    aggregate.recordDurations(3, new AtomicLongArray(1, 2, 3))
-    then:
-    aggregate.getDuration() == 6
-  }
-
-  def "clear"() {
-    given:
-    AggregateMetric aggregate = new AggregateMetric()
-      .recordDurations(3, new AtomicLongArray(5, ERROR_TAG | 6, TOP_LEVEL_TAG | 7))
-    when:
-    aggregate.clear()
-    then:
-    aggregate.getDuration() == 0
-    aggregate.getErrorCount() == 0
-    aggregate.getTopLevelCount() == 0
-    aggregate.getHitCount() == 0
-  }
-
-  def "recordOneDuration accumulates ok and error and top-level"() {
-    given:
-    AggregateMetric aggregate = new AggregateMetric()
-      .recordOneDuration(10L)
-      .recordOneDuration(10L | TOP_LEVEL_TAG)
-      .recordOneDuration(10L | ERROR_TAG)
-
-    expect:
-    aggregate.getHitCount() == 3
-    aggregate.getDuration() == 30
-    aggregate.getErrorCount() == 1
-    aggregate.getTopLevelCount() == 1
-  }
-
-  def "ignore trailing zeros"() {
-    given:
-    AggregateMetric aggregate = new AggregateMetric()
-    when:
-    aggregate.recordDurations(3, new AtomicLongArray(1, 2, 3, 0, 0, 0))
-    then:
-    aggregate.getDuration() == 6
-    aggregate.getHitCount() == 3
-    aggregate.getErrorCount() == 0
-  }
-
-  def "hit count includes errors"() {
-    given:
-    AggregateMetric aggregate = new AggregateMetric()
-    when:
-    aggregate.recordDurations(3, new AtomicLongArray(1, 2, 3 | ERROR_TAG))
-    then:
-    aggregate.getHitCount() == 3
-    aggregate.getErrorCount() == 1
-  }
-
-  def "ok and error durations tracked separately"() {
-    given:
-    AggregateMetric aggregate = new AggregateMetric()
-    when:
-    aggregate.recordDurations(10,
-      new AtomicLongArray(1, 100 | ERROR_TAG, 2, 99 | ERROR_TAG, 3,
-      98  | ERROR_TAG, 4, 97  | ERROR_TAG))
-    then:
-    def errorLatencies = aggregate.getErrorLatencies()
-    def okLatencies = aggregate.getOkLatencies()
-    errorLatencies.getMaxValue() >= 99
-    okLatencies.getMaxValue() <= 5
-  }
-}
diff --git a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy
index a95f6bcbdbc..193d46b18bd 100644
--- a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy
+++ b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy
@@ -27,7 +27,12 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
   @Shared
   long reportingInterval = 1
   @Shared
-  int queueSize = 256
+  // The CSS overclaim path claims traceSize slots per publish (not just the eligible count),
+  // so tests with multi-span traces and tight ring sizes can spuriously hit capacity. 1024 leaves
+  // plenty of headroom for the largest test traces (~10 spans) across hundreds of iterations.
+  // ConflatingMetricsAggregatorInboxFullTest deliberately keeps a tiny ring to exercise full-ring
+  // behaviour.
+  int queueSize = 1024
 
   def "should ignore traces with no measured spans"() {
     setup:
@@ -119,23 +124,25 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then:
     latchTriggered
     1 * writer.startBucket(1, _, _)
-    1 * writer.add(new MetricKey(
-      null,
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "baz",
-      [],
-      null,
-      null,
-      null
-      ), _) >> { MetricKey key, AggregateMetric value ->
-        value.getHitCount() == 1 && value.getTopLevelCount() == 1 && value.getDuration() == 100
-      }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        null,
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "baz",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 1 && e.getDuration() == 100
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -165,23 +172,25 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then:
     latchTriggered
     1 * writer.startBucket(1, _, _)
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "baz",
-      [],
-      null,
-      null,
-      null
-      ), _) >> { MetricKey key, AggregateMetric value ->
-        value.getHitCount() == 1 && value.getTopLevelCount() == 1 && value.getDuration() == 100
-      }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "baz",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 1 && e.getDuration() == 100
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -216,24 +225,26 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then:
     latchTriggered == statsComputed
     (statsComputed ? 1 : 0) * writer.startBucket(1, _, _)
-    (statsComputed ? 1 : 0) * writer.add(
-      new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      kind,
-      [],
-      httpMethod,
-      httpEndpoint,
-      null
-      ), { AggregateMetric aggregateMetric ->
-        aggregateMetric.getHitCount() == 1 && aggregateMetric.getTopLevelCount() == 0 && aggregateMetric.getDuration() == 100
-      })
+    (statsComputed ? 1 : 0) * writer.add({
+      AggregateEntryTestUtils.equals(it,
+        AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        kind,
+        [],
+        httpMethod,
+        httpEndpoint,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 0 && e.getDuration() == 100
+    }
     (statsComputed ? 1 : 0) * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -293,42 +304,46 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then:
     cycle1Triggered
     cycle2Triggered
-    1 * writer.add(
-      new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "client",
-      [UTF8BytesString.create("country:france")],
-      null,
-      null,
-      null
-      ), { AggregateMetric aggregateMetric ->
-        aggregateMetric.getHitCount() == 1 && aggregateMetric.getTopLevelCount() == 0 && aggregateMetric.getDuration() == 100
-      })
-    1 * writer.add(
-      new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "client",
-      [UTF8BytesString.create("country:france"), UTF8BytesString.create("georegion:europe")],
-      null,
-      null,
-      null
-      ), { AggregateMetric aggregateMetric ->
-        aggregateMetric.getHitCount() == 1 && aggregateMetric.getTopLevelCount() == 0 && aggregateMetric.getDuration() == 100
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,
+        AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "client",
+        [UTF8BytesString.create("country:france")],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 0 && e.getDuration() == 100
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,
+        AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "client",
+        [UTF8BytesString.create("country:france"), UTF8BytesString.create("georegion:europe")],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 0 && e.getDuration() == 100
+    }
     2 * writer.finishBucket() >> { latch1.countDown(); latch2.countDown() }
 
     cleanup:
@@ -358,24 +373,26 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then:
     latchTriggered
     1 * writer.startBucket(1, _, _)
-    1 * writer.add(
-      new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      kind,
-      expectedPeerTags,
-      null,
-      null,
-      null
-      ), { AggregateMetric aggregateMetric ->
-        aggregateMetric.getHitCount() == 1 && aggregateMetric.getTopLevelCount() == 0 && aggregateMetric.getDuration() == 100
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,
+        AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        kind,
+        expectedPeerTags,
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 0 && e.getDuration() == 100
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -411,23 +428,25 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then:
     latchTriggered
     1 * writer.startBucket(1, _, _)
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "baz",
-      [],
-      null,
-      null,
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getTopLevelCount() == topLevelCount && value.getDuration() == 100
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "baz",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == topLevelCount && e.getDuration() == 100
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -470,40 +489,44 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     latchTriggered
     1 * writer.finishBucket() >> { latch.countDown() }
     1 * writer.startBucket(2, _, SECONDS.toNanos(reportingInterval))
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "baz",
-      [],
-      null,
-      null,
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == count && value.getDuration() == count * duration
-      })
-    1 * writer.add(new MetricKey(
-      "resource2",
-      "service2",
-      "operation2",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "baz",
-      [],
-      null,
-      null,
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == count && value.getDuration() == count * duration * 2
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "baz",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == count && e.getDuration() == count * duration
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource2",
+        "service2",
+        "operation2",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "baz",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == count && e.getDuration() == count * duration * 2
+    }
 
     cleanup:
     aggregator.close()
@@ -541,23 +564,25 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then: "should aggregate into single metric"
     latchTriggered
     1 * writer.startBucket(1, _, _)
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "server",
-      [],
-      "GET",
-      "/api/users/:id",
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == count && value.getDuration() == count * duration
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "server",
+        [],
+        "GET",
+        "/api/users/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == count && e.getDuration() == count * duration
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     when: "publish spans with different endpoints"
@@ -582,57 +607,63 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then: "should create separate metrics for each endpoint/method combination"
     latchTriggered2
     1 * writer.startBucket(3, _, _)
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "server",
-      [],
-      "GET",
-      "/api/users/:id",
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration
-      })
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "server",
-      [],
-      "GET",
-      "/api/orders/:id",
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration * 2
-      })
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "server",
-      [],
-      "POST",
-      "/api/users/:id",
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration * 3
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "server",
+        [],
+        "GET",
+        "/api/users/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "server",
+        [],
+        "GET",
+        "/api/orders/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration * 2
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "server",
+        [],
+        "POST",
+        "/api/users/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration * 3
+    }
     1 * writer.finishBucket() >> { latch2.countDown() }
 
     cleanup:
@@ -680,74 +711,82 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then: "should create 4 separate metrics"
     latchTriggered
     1 * writer.startBucket(4, _, _)
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      200,
-      false,
-      false,
-      "server",
-      [],
-      "GET",
-      "/api/users/:id",
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration
-      })
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      200,
-      false,
-      false,
-      "server",
-      [],
-      "POST",
-      "/api/users/:id",
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration * 2
-      })
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      404,
-      false,
-      false,
-      "server",
-      [],
-      "GET",
-      "/api/users/:id",
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration * 3
-      })
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      200,
-      false,
-      false,
-      "server",
-      [],
-      "GET",
-      "/api/orders/:id",
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration * 4
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        200,
+        false,
+        false,
+        "server",
+        [],
+        "GET",
+        "/api/users/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        200,
+        false,
+        false,
+        "server",
+        [],
+        "POST",
+        "/api/users/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration * 2
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        404,
+        false,
+        false,
+        "server",
+        [],
+        "GET",
+        "/api/users/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration * 3
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        200,
+        false,
+        false,
+        "server",
+        [],
+        "GET",
+        "/api/orders/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration * 4
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -784,40 +823,44 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then: "should create separate metric keys for spans with and without HTTP tags"
     latchTriggered
     1 * writer.startBucket(2, _, _)
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      200,
-      false,
-      false,
-      "server",
-      [],
-      null,
-      null,
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration
-      })
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      200,
-      false,
-      false,
-      "server",
-      [],
-      "GET",
-      "/api/users/:id",
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration * 2
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        200,
+        false,
+        false,
+        "server",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        200,
+        false,
+        false,
+        "server",
+        [],
+        "GET",
+        "/api/users/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration * 2
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -852,47 +895,54 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then: "should create the different metric keys for spans with and without sources"
     latchTriggered
     1 * writer.startBucket(2, _, _)
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      "source",
-      "type",
-      200,
-      false,
-      false,
-      "server",
-      [],
-      null,
-      null,
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 2 && value.getDuration() == 2 * duration
-      })
-    1 * writer.add(new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      200,
-      false,
-      false,
-      "server",
-      [],
-      null,
-      null,
-      null
-      ), { AggregateMetric value ->
-        value.getHitCount() == 1 && value.getDuration() == duration
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        "source",
+        "type",
+        200,
+        false,
+        false,
+        "server",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 2 && e.getDuration() == 2 * duration
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        200,
+        false,
+        false,
+        "server",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getDuration() == duration
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
     aggregator.close()
   }
 
-  def "test least recently written to aggregate flushed when size limit exceeded"() {
+  def "new aggregates beyond size limit are dropped when no stale entries can be evicted"() {
+    // The table only evicts entries with hitCount == 0 to make room. When all entries are live
+    // (all have been recorded against), an over-cap insert drops the new key rather than evicting
+    // an established one. This protects the data we've already collected from a burst of new keys.
     setup:
     int maxAggregates = 10
     MetricWriter writer = Mock(MetricWriter)
@@ -916,11 +966,11 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     aggregator.report()
     def latchTriggered = latch.await(2, SECONDS)
 
-    then: "the first aggregate should be dropped but the rest reported"
+    then: "the established service0..service9 are reported; service10 is dropped"
     latchTriggered
     1 * writer.startBucket(10, _, SECONDS.toNanos(reportingInterval))
-    for (int i = 1; i < 11; ++i) {
-      1 * writer.add(new MetricKey(
+    for (int i = 0; i < 10; ++i) {
+      def expected = AggregateEntryTestUtils.of(
         "resource",
         "service" + i,
         "operation",
@@ -933,26 +983,28 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
         [],
         null,
         null,
-        null
-        ), _) >> { MetricKey key, AggregateMetric value ->
-          value.getHitCount() == 1 && value.getDuration() == duration
-        }
+        null)
+      1 * writer.add({ AggregateEntryTestUtils.equals(it, expected) }) >> { AggregateEntry e ->
+        assert e.getHitCount() == 1 && e.getDuration() == duration
+      }
     }
-    0 * writer.add(new MetricKey(
-      "resource",
-      "service0",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "baz",
-      [],
-      null,
-      null,
-      null
-      ), _)
+    0 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service10",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "baz",
+        [],
+        null,
+        null,
+        null
+        ))
+    })
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -1067,7 +1119,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     latchTriggered
     1 * writer.startBucket(5, _, SECONDS.toNanos(reportingInterval))
     for (int i = 0; i < 5; ++i) {
-      1 * writer.add(new MetricKey(
+      def expected = AggregateEntryTestUtils.of(
         "resource",
         "service" + i,
         "operation",
@@ -1080,10 +1132,10 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
         [],
         null,
         null,
-        null
-        ), { AggregateMetric value ->
-          value.getHitCount() == 1 && value.getDuration() == duration
-        })
+        null)
+      1 * writer.add({ AggregateEntryTestUtils.equals(it, expected) }) >> { AggregateEntry e ->
+        assert e.getHitCount() == 1 && e.getDuration() == duration
+      }
     }
     1 * writer.finishBucket() >> { latch.countDown() }
 
@@ -1102,7 +1154,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     latchTriggered
     1 * writer.startBucket(4, _, SECONDS.toNanos(reportingInterval))
     for (int i = 1; i < 5; ++i) {
-      1 * writer.add(new MetricKey(
+      def expected = AggregateEntryTestUtils.of(
         "resource",
         "service" + i,
         "operation",
@@ -1115,26 +1167,28 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
         [],
         null,
         null,
-        null
-        ), { AggregateMetric value ->
-          value.getHitCount() == 1 && value.getDuration() == duration
-        })
+        null)
+      1 * writer.add({ AggregateEntryTestUtils.equals(it, expected) }) >> { AggregateEntry e ->
+        assert e.getHitCount() == 1 && e.getDuration() == duration
+      }
     }
-    0 * writer.add(new MetricKey(
-      "resource",
-      "service0",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "baz",
-      [],
-      null,
-      null,
-      null
-      ), _)
+    0 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "resource",
+        "service0",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "baz",
+        [],
+        null,
+        null,
+        null
+        ))
+    })
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -1169,7 +1223,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     latchTriggered
     1 * writer.startBucket(5, _, SECONDS.toNanos(reportingInterval))
     for (int i = 0; i < 5; ++i) {
-      1 * writer.add(new MetricKey(
+      def expected = AggregateEntryTestUtils.of(
         "resource",
         "service" + i,
         "operation",
@@ -1182,10 +1236,10 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
         [],
         null,
         null,
-        null
-        ), { AggregateMetric value ->
-          value.getHitCount() == 1 && value.getDuration() == duration
-        })
+        null)
+      1 * writer.add({ AggregateEntryTestUtils.equals(it, expected) }) >> { AggregateEntry e ->
+        assert e.getHitCount() == 1 && e.getDuration() == duration
+      }
     }
     1 * writer.finishBucket() >> { latch.countDown() }
 
@@ -1195,7 +1249,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then: "aggregate not updated in cycle is not reported"
     0 * writer.finishBucket()
     0 * writer.startBucket(_, _, _)
-    0 * writer.add(_, _)
+    0 * writer.add(_)
 
     cleanup:
     aggregator.close()
@@ -1228,7 +1282,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     latchTriggered
     1 * writer.startBucket(5, _, SECONDS.toNanos(1))
     for (int i = 0; i < 5; ++i) {
-      1 * writer.add(new MetricKey(
+      def expected = AggregateEntryTestUtils.of(
         "resource",
         "service" + i,
         "operation",
@@ -1241,10 +1295,10 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
         [],
         null,
         null,
-        null
-        ), { AggregateMetric value ->
-          value.getHitCount() == 1 && value.getDuration() == duration
-        })
+        null)
+      1 * writer.add({ AggregateEntryTestUtils.equals(it, expected) }) >> { AggregateEntry e ->
+        assert e.getHitCount() == 1 && e.getDuration() == duration
+      }
     }
     1 * writer.finishBucket() >> { latch.countDown() }
 
@@ -1394,24 +1448,26 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then:
     latchTriggered
     1 * writer.startBucket(1, _, _)
-    1 * writer.add(
-      new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      true,
-      "",
-      [],
-      null,
-      null,
-      null
-      ), { AggregateMetric aggregateMetric ->
-        aggregateMetric.getHitCount() == 1 && aggregateMetric.getTopLevelCount() == 1 && aggregateMetric.getDuration() == 100
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,
+        AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        true,
+        "",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 1 && e.getDuration() == 100
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -1449,24 +1505,26 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then: "all spans should go to the same bucket (httpMethod and httpEndpoint are ignored)"
     latchTriggered
     1 * writer.startBucket(1, _, _)
-    1 * writer.add(
-      new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "server",
-      [],
-      null,
-      null,
-      null
-      ), { AggregateMetric aggregateMetric ->
-        aggregateMetric.getHitCount() == 3 && aggregateMetric.getTopLevelCount() == 3 && aggregateMetric.getDuration() == 450
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,
+        AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "server",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 3 && e.getTopLevelCount() == 3 && e.getDuration() == 450
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -1504,60 +1562,66 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then: "spans should go to separate buckets based on httpMethod and httpEndpoint"
     latchTriggered
     1 * writer.startBucket(3, _, _)
-    1 * writer.add(
-      new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "server",
-      [],
-      "GET",
-      "/api/users/:id",
-      null
-      ), { AggregateMetric aggregateMetric ->
-        aggregateMetric.getHitCount() == 1 && aggregateMetric.getTopLevelCount() == 1 && aggregateMetric.getDuration() == 100
-      })
-    1 * writer.add(
-      new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "server",
-      [],
-      "POST",
-      "/api/orders",
-      null
-      ), { AggregateMetric aggregateMetric ->
-        aggregateMetric.getHitCount() == 1 && aggregateMetric.getTopLevelCount() == 1 && aggregateMetric.getDuration() == 200
-      })
-    1 * writer.add(
-      new MetricKey(
-      "resource",
-      "service",
-      "operation",
-      null,
-      "type",
-      HTTP_OK,
-      false,
-      false,
-      "server",
-      [],
-      null,
-      null,
-      null
-      ), { AggregateMetric aggregateMetric ->
-        aggregateMetric.getHitCount() == 1 && aggregateMetric.getTopLevelCount() == 1 && aggregateMetric.getDuration() == 150
-      })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,
+        AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "server",
+        [],
+        "GET",
+        "/api/users/:id",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 1 && e.getDuration() == 100
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,
+        AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "server",
+        [],
+        "POST",
+        "/api/orders",
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 1 && e.getDuration() == 200
+    }
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,
+        AggregateEntryTestUtils.of(
+        "resource",
+        "service",
+        "operation",
+        null,
+        "type",
+        HTTP_OK,
+        false,
+        false,
+        "server",
+        [],
+        null,
+        null,
+        null
+        ))
+    }) >> { AggregateEntry e ->
+      assert e.getHitCount() == 1 && e.getTopLevelCount() == 1 && e.getDuration() == 150
+    }
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -1593,51 +1657,57 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
     then:
     latchTriggered
     1 * writer.startBucket(3, _, _)
-    1 * writer.add(new MetricKey(
-      "grpc.service/Method",
-      "service",
-      "grpc.server",
-      null,
-      "rpc",
-      0,
-      false,
-      false,
-      "server",
-      [],
-      null,
-      null,
-      "0"
-      ), _)
-    1 * writer.add(new MetricKey(
-      "grpc.service/Method",
-      "service",
-      "grpc.server",
-      null,
-      "rpc",
-      0,
-      false,
-      false,
-      "server",
-      [],
-      null,
-      null,
-      "5"
-      ), _)
-    1 * writer.add(new MetricKey(
-      "GET /api",
-      "service",
-      "http.request",
-      null,
-      "web",
-      200,
-      false,
-      false,
-      "server",
-      [],
-      null,
-      null,
-      null
-      ), _)
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "grpc.service/Method",
+        "service",
+        "grpc.server",
+        null,
+        "rpc",
+        0,
+        false,
+        false,
+        "server",
+        [],
+        null,
+        null,
+        "0"
+        ))
+    })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "grpc.service/Method",
+        "service",
+        "grpc.server",
+        null,
+        "rpc",
+        0,
+        false,
+        false,
+        "server",
+        [],
+        null,
+        null,
+        "5"
+        ))
+    })
+    1 * writer.add({
+      AggregateEntryTestUtils.equals(it,AggregateEntryTestUtils.of(
+        "GET /api",
+        "service",
+        "http.request",
+        null,
+        "web",
+        200,
+        false,
+        false,
+        "server",
+        [],
+        null,
+        null,
+        null
+        ))
+    })
     1 * writer.finishBucket() >> { latch.countDown() }
 
     cleanup:
@@ -1653,7 +1723,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
 
   def waitUntilEmpty(ConflatingMetricsAggregator aggregator) {
     int i = 0
-    while (!aggregator.inbox.isEmpty() && i++ < 100) {
+    while ((!aggregator.dataInbox.isEmpty() || !aggregator.signalInbox.isEmpty()) && i++ < 100) {
       Thread.sleep(10)
     }
   }
diff --git a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/SerializingMetricWriterTest.groovy b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/SerializingMetricWriterTest.groovy
index 3ff81de9851..cc0880bc30a 100644
--- a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/SerializingMetricWriterTest.groovy
+++ b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/SerializingMetricWriterTest.groovy
@@ -7,7 +7,6 @@ import static java.util.concurrent.TimeUnit.SECONDS
 import datadog.metrics.api.Histograms
 import datadog.metrics.impl.DDSketchHistograms
 import datadog.trace.api.Config
-import datadog.trace.api.Pair
 import datadog.trace.api.ProcessTags
 import datadog.trace.api.WellKnownTags
 import datadog.trace.api.git.CommitInfo
@@ -16,7 +15,6 @@ import datadog.trace.api.git.GitInfoProvider
 import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString
 import datadog.trace.test.util.DDSpecification
 import java.nio.ByteBuffer
-import java.util.concurrent.atomic.AtomicLongArray
 import org.msgpack.core.MessagePack
 import org.msgpack.core.MessageUnpacker
 
@@ -26,6 +24,30 @@ class SerializingMetricWriterTest extends DDSpecification {
     Histograms.register(DDSketchHistograms.FACTORY)
   }
 
+  /** Build an {@link AggregateEntry} with a pre-recorded duration count. */
+  private static AggregateEntry entry(
+    CharSequence resource,
+    CharSequence service,
+    CharSequence operationName,
+    CharSequence serviceSource,
+    CharSequence type,
+    int httpStatusCode,
+    boolean synthetic,
+    boolean traceRoot,
+    CharSequence spanKind,
+    List<UTF8BytesString> peerTags,
+    CharSequence httpMethod,
+    CharSequence httpEndpoint,
+    CharSequence grpcStatusCode,
+    int hitCount) {
+    AggregateEntry e = AggregateEntryTestUtils.of(
+      resource, service, operationName, serviceSource, type,
+      httpStatusCode, synthetic, traceRoot, spanKind, peerTags,
+      httpMethod, httpEndpoint, grpcStatusCode)
+    hitCount.times { e.recordOneDuration(1L) }
+    return e
+  }
+
   def "should produce correct message #iterationIndex with process tags enabled #withProcessTags" () {
     setup:
     if (!withProcessTags) {
@@ -40,8 +62,8 @@ class SerializingMetricWriterTest extends DDSpecification {
 
     when:
     writer.startBucket(content.size(), startTime, duration)
-    for (Pair<MetricKey, AggregateMetric> pair : content) {
-      writer.add(pair.getLeft(), pair.getRight())
+    for (AggregateEntry e : content) {
+      writer.add(e)
     }
     writer.finishBucket()
 
@@ -55,88 +77,40 @@ class SerializingMetricWriterTest extends DDSpecification {
     where:
     content << [
       [
-        Pair.of(
-        new MetricKey(
-        "resource1",
-        "service1",
-        "operation1",
-        null,
-        "type",
-        0,
-        false,
-        false,
-        "client",
+        entry(
+        "resource1", "service1", "operation1", null, "type", 0,
+        false, false, "client",
         [
           UTF8BytesString.create("country:canada"),
           UTF8BytesString.create("georegion:amer"),
           UTF8BytesString.create("peer.service:remote-service")
         ],
-        null,
-        null,
-        null
-        ),
-        new AggregateMetric().recordDurations(10, new AtomicLongArray(1L))
-        ),
-        Pair.of(
-        new MetricKey(
-        "resource2",
-        "service2",
-        "operation2",
-        null,
-        "type2",
-        200,
-        true,
-        false,
-        "producer",
+        null, null, null,
+        10),
+        entry(
+        "resource2", "service2", "operation2", null, "type2", 200,
+        true, false, "producer",
         [
           UTF8BytesString.create("country:canada"),
           UTF8BytesString.create("georegion:amer"),
           UTF8BytesString.create("peer.service:remote-service")
         ],
-        null,
-        null,
-        null
-        ),
-        new AggregateMetric().recordDurations(9, new AtomicLongArray(1L))
-        ),
-        Pair.of(
-        new MetricKey(
-        "GET /api/users/:id",
-        "web-service",
-        "http.request",
-        null,
-        "web",
-        200,
-        false,
-        true,
-        "server",
+        null, null, null,
+        9),
+        entry(
+        "GET /api/users/:id", "web-service", "http.request", null, "web", 200,
+        false, true, "server",
         [],
-        "GET",
-        "/api/users/:id",
-        null
-        ),
-        new AggregateMetric().recordDurations(5, new AtomicLongArray(1L))
-        )
+        null, null, null,
+        5)
       ],
       (0..10000).collect({ i ->
-        Pair.of(
-          new MetricKey(
-          "resource" + i,
-          "service" + i,
-          "operation" + i,
-          null,
-          "type",
-          0,
-          false,
-          false,
-          "producer",
+        entry(
+          "resource" + i, "service" + i, "operation" + i, null, "type", 0,
+          false, false, "producer",
           [UTF8BytesString.create("messaging.destination:dest" + i)],
-          null,
-          null,
-          null
-          ),
-          new AggregateMetric().recordDurations(10, new AtomicLongArray(1L))
-          )
+          null, null, null,
+          10)
       })
     ]
     withProcessTags << [true, false]
@@ -148,22 +122,18 @@ class SerializingMetricWriterTest extends DDSpecification {
     long duration = SECONDS.toNanos(10)
     WellKnownTags wellKnownTags = new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language")
 
-    // Create keys with different combinations of HTTP fields
-    def keyWithNoSource = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "server", [], "GET", "/api/users", null)
-    def keyWithSource = new MetricKey("resource", "service", "operation", "source", "type", 200, false, false, "server", [], "POST", null, null)
+    def entryNoSource = entry("resource", "service", "operation", null, "type", 200, false, false, "server", [], "GET", "/api/users", null, 1)
+    def entryWithSource = entry("resource", "service", "operation", "source", "type", 200, false, false, "server", [], "POST", null, null, 1)
 
-    def content = [
-      Pair.of(keyWithNoSource, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithSource, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-    ]
+    def content = [entryNoSource, entryWithSource]
 
     ValidatingSink sink = new ValidatingSink(wellKnownTags, startTime, duration, content)
     SerializingMetricWriter writer = new SerializingMetricWriter(wellKnownTags, sink, 128)
 
     when:
     writer.startBucket(content.size(), startTime, duration)
-    for (Pair<MetricKey, AggregateMetric> pair : content) {
-      writer.add(pair.getLeft(), pair.getRight())
+    for (AggregateEntry e : content) {
+      writer.add(e)
     }
     writer.finishBucket()
 
@@ -177,34 +147,25 @@ class SerializingMetricWriterTest extends DDSpecification {
     long duration = SECONDS.toNanos(10)
     WellKnownTags wellKnownTags = new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language")
 
-    // Create keys with different combinations of HTTP fields
-    def keyWithBoth = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "server", [], "GET", "/api/users", null)
-    def keyWithMethodOnly = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "server", [], "POST", null,null)
-    def keyWithEndpointOnly = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "server", [], null, "/api/orders",null)
-    def keyWithNeither = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "client", [], null, null, null)
-
-    def content = [
-      Pair.of(keyWithBoth, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithMethodOnly, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithEndpointOnly, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithNeither, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L)))
-    ]
+    def entryWithBoth = entry("resource", "service", "operation", null, "type", 200, false, false, "server", [], "GET", "/api/users", null, 1)
+    def entryWithMethodOnly = entry("resource", "service", "operation", null, "type", 200, false, false, "server", [], "POST", null, null, 1)
+    def entryWithEndpointOnly = entry("resource", "service", "operation", null, "type", 200, false, false, "server", [], null, "/api/orders", null, 1)
+    def entryWithNeither = entry("resource", "service", "operation", null, "type", 200, false, false, "client", [], null, null, null, 1)
+
+    def content = [entryWithBoth, entryWithMethodOnly, entryWithEndpointOnly, entryWithNeither]
 
     ValidatingSink sink = new ValidatingSink(wellKnownTags, startTime, duration, content)
     SerializingMetricWriter writer = new SerializingMetricWriter(wellKnownTags, sink, 128)
 
     when:
     writer.startBucket(content.size(), startTime, duration)
-    for (Pair<MetricKey, AggregateMetric> pair : content) {
-      writer.add(pair.getLeft(), pair.getRight())
+    for (AggregateEntry e : content) {
+      writer.add(e)
     }
     writer.finishBucket()
 
     then:
     sink.validatedInput()
-    // Test passes if validation in ValidatingSink succeeds
-    // ValidatingSink verifies that map size matches actual number of fields
-    // and that HTTPMethod/HTTPEndpoint are only present when non-empty
   }
 
   def "add git sha commit info when sha commit is #shaCommit"() {
@@ -216,40 +177,63 @@ class SerializingMetricWriterTest extends DDSpecification {
     long duration = SECONDS.toNanos(10)
     WellKnownTags wellKnownTags = new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language")
 
-    // Create keys with different combinations of HTTP fields
-    def key = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "server", [], "GET", "/api/users", null)
+    def e = entry("resource", "service", "operation", null, "type", 200, false, false, "server", [], "GET", "/api/users", null, 1)
 
-    def content = [Pair.of(key, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),]
+    def content = [e]
 
     ValidatingSink sink = new ValidatingSink(wellKnownTags, startTime, duration, content)
     SerializingMetricWriter writer = new SerializingMetricWriter(wellKnownTags, sink, 128, gitInfoProvider)
 
     when:
-
     writer.startBucket(content.size(), startTime, duration)
-    for (Pair<MetricKey, AggregateMetric> pair : content) {
-      writer.add(pair.getLeft(), pair.getRight())
+    for (AggregateEntry entryItem : content) {
+      writer.add(entryItem)
     }
     writer.finishBucket()
 
     then:
-
     sink.validatedInput()
 
     where:
     shaCommit << [null, "123456"]
   }
 
+  def "GRPCStatusCode field is present in payload for rpc-type spans"() {
+    setup:
+    long startTime = MILLISECONDS.toNanos(System.currentTimeMillis())
+    long duration = SECONDS.toNanos(10)
+    WellKnownTags wellKnownTags = new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language")
+
+    def entryWithGrpc = entry("grpc.service/Method", "grpc-service", "grpc.server", null, "rpc", 0, false, false, "server", [], null, null, "OK", 1)
+    def entryWithGrpcError = entry("grpc.service/Method", "grpc-service", "grpc.server", null, "rpc", 0, false, false, "client", [], null, null, "NOT_FOUND", 1)
+    def entryWithoutGrpc = entry("resource", "service", "operation", null, "web", 200, false, false, "server", [], null, null, null, 1)
+
+    def content = [entryWithGrpc, entryWithGrpcError, entryWithoutGrpc]
+
+    ValidatingSink sink = new ValidatingSink(wellKnownTags, startTime, duration, content)
+    SerializingMetricWriter writer = new SerializingMetricWriter(wellKnownTags, sink, 128)
+
+    when:
+    writer.startBucket(content.size(), startTime, duration)
+    for (AggregateEntry e : content) {
+      writer.add(e)
+    }
+    writer.finishBucket()
+
+    then:
+    sink.validatedInput()
+  }
+
   static class ValidatingSink implements Sink {
 
     private final WellKnownTags wellKnownTags
     private final long startTimeNanos
     private final long duration
     private boolean validated = false
-    private List<Pair<MetricKey, AggregateMetric>> content
+    private List<AggregateEntry> content
 
     ValidatingSink(WellKnownTags wellKnownTags, long startTimeNanos, long duration,
-    List<Pair<MetricKey, AggregateMetric>> content) {
+    List<AggregateEntry> content) {
       this.wellKnownTags = wellKnownTags
       this.startTimeNanos = startTimeNanos
       this.duration = duration
@@ -298,83 +282,81 @@ class SerializingMetricWriterTest extends DDSpecification {
       assert unpacker.unpackString() == "Stats"
       int statCount = unpacker.unpackArrayHeader()
       assert statCount == content.size()
-      for (Pair<MetricKey, AggregateMetric> pair : content) {
-        MetricKey key = pair.getLeft()
-        AggregateMetric value = pair.getRight()
+      for (AggregateEntry entry : content) {
         int metricMapSize = unpacker.unpackMapHeader()
         // Calculate expected map size based on optional fields
-        boolean hasHttpMethod = key.getHttpMethod() != null
-        boolean hasHttpEndpoint = key.getHttpEndpoint() != null
-        boolean hasServiceSource = key.getServiceSource() != null
-        boolean hasGrpcStatusCode = key.getGrpcStatusCode() != null
+        boolean hasHttpMethod = entry.getHttpMethod() != null
+        boolean hasHttpEndpoint = entry.getHttpEndpoint() != null
+        boolean hasServiceSource = entry.getServiceSource() != null
+        boolean hasGrpcStatusCode = entry.getGrpcStatusCode() != null
         int expectedMapSize = 15 + (hasServiceSource ? 1 : 0) + (hasHttpMethod ? 1 : 0) + (hasHttpEndpoint ? 1 : 0) + (hasGrpcStatusCode ? 1 : 0)
         assert metricMapSize == expectedMapSize
         int elementCount = 0
         assert unpacker.unpackString() == "Name"
-        assert unpacker.unpackString() == key.getOperationName() as String
+        assert unpacker.unpackString() == entry.getOperationName() as String
         ++elementCount
         assert unpacker.unpackString() == "Service"
-        assert unpacker.unpackString() == key.getService() as String
+        assert unpacker.unpackString() == entry.getService() as String
         ++elementCount
         assert unpacker.unpackString() == "Resource"
-        assert unpacker.unpackString() == key.getResource() as String
+        assert unpacker.unpackString() == entry.getResource() as String
         ++elementCount
         assert unpacker.unpackString() == "Type"
-        assert unpacker.unpackString() == key.getType() as String
+        assert unpacker.unpackString() == entry.getType() as String
         ++elementCount
         assert unpacker.unpackString() == "HTTPStatusCode"
-        assert unpacker.unpackInt() == key.getHttpStatusCode()
+        assert unpacker.unpackInt() == entry.getHttpStatusCode()
         ++elementCount
         assert unpacker.unpackString() == "Synthetics"
-        assert unpacker.unpackBoolean() == key.isSynthetics()
+        assert unpacker.unpackBoolean() == entry.isSynthetics()
         ++elementCount
         assert unpacker.unpackString() == "IsTraceRoot"
-        assert unpacker.unpackInt() == (key.isTraceRoot() ? TriState.TRUE.serialValue : TriState.FALSE.serialValue)
+        assert unpacker.unpackInt() == (entry.isTraceRoot() ? TriState.TRUE.serialValue : TriState.FALSE.serialValue)
         ++elementCount
         assert unpacker.unpackString() == "SpanKind"
-        assert unpacker.unpackString() == key.getSpanKind() as String
+        assert unpacker.unpackString() == entry.getSpanKind() as String
         ++elementCount
         assert unpacker.unpackString() == "PeerTags"
         int peerTagsLength = unpacker.unpackArrayHeader()
-        assert peerTagsLength == key.getPeerTags().size()
+        assert peerTagsLength == entry.getPeerTags().size()
         for (int i = 0; i < peerTagsLength; i++) {
           def unpackedPeerTag = unpacker.unpackString()
-          assert unpackedPeerTag == key.getPeerTags()[i].toString()
+          assert unpackedPeerTag == entry.getPeerTags()[i].toString()
         }
         ++elementCount
         // Service source is only present when the service name has been overridden by the tracer
         if (hasServiceSource) {
           assert unpacker.unpackString() == "srv_src"
-          assert unpacker.unpackString() == key.getServiceSource().toString()
+          assert unpacker.unpackString() == entry.getServiceSource().toString()
           ++elementCount
         }
         // HTTPMethod and HTTPEndpoint are optional - only present if non-null
         if (hasHttpMethod) {
           assert unpacker.unpackString() == "HTTPMethod"
-          assert unpacker.unpackString() == key.getHttpMethod() as String
+          assert unpacker.unpackString() == entry.getHttpMethod() as String
           ++elementCount
         }
         if (hasHttpEndpoint) {
           assert unpacker.unpackString() == "HTTPEndpoint"
-          assert unpacker.unpackString() == key.getHttpEndpoint() as String
+          assert unpacker.unpackString() == entry.getHttpEndpoint() as String
           ++elementCount
         }
         if (hasGrpcStatusCode) {
           assert unpacker.unpackString() == "GRPCStatusCode"
-          assert unpacker.unpackString() == key.getGrpcStatusCode() as String
+          assert unpacker.unpackString() == entry.getGrpcStatusCode() as String
           ++elementCount
         }
         assert unpacker.unpackString() == "Hits"
-        assert unpacker.unpackInt() == value.getHitCount()
+        assert unpacker.unpackInt() == entry.getHitCount()
         ++elementCount
         assert unpacker.unpackString() == "Errors"
-        assert unpacker.unpackInt() == value.getErrorCount()
+        assert unpacker.unpackInt() == entry.getErrorCount()
         ++elementCount
         assert unpacker.unpackString() == "TopLevelHits"
-        assert unpacker.unpackInt() == value.getTopLevelCount()
+        assert unpacker.unpackInt() == entry.getTopLevelCount()
         ++elementCount
         assert unpacker.unpackString() == "Duration"
-        assert unpacker.unpackLong() == value.getDuration()
+        assert unpacker.unpackLong() == entry.getDuration()
         ++elementCount
         assert unpacker.unpackString() == "OkSummary"
         validateSketch(unpacker)
@@ -397,99 +379,4 @@ class SerializingMetricWriterTest extends DDSpecification {
       return validated
     }
   }
-
-  def "ServiceSource optional in the payload"() {
-    setup:
-    long startTime = MILLISECONDS.toNanos(System.currentTimeMillis())
-    long duration = SECONDS.toNanos(10)
-    WellKnownTags wellKnownTags = new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language")
-
-    // Create keys with different combinations of HTTP fields
-    def keyWithNoSource = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "server", [], "GET", "/api/users", null)
-    def keyWithSource = new MetricKey("resource", "service", "operation", "source", "type", 200, false, false, "server", [], "POST", null, null)
-
-    def content = [
-      Pair.of(keyWithNoSource, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithSource, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-    ]
-
-    ValidatingSink sink = new ValidatingSink(wellKnownTags, startTime, duration, content)
-    SerializingMetricWriter writer = new SerializingMetricWriter(wellKnownTags, sink, 128)
-
-    when:
-    writer.startBucket(content.size(), startTime, duration)
-    for (Pair<MetricKey, AggregateMetric> pair : content) {
-      writer.add(pair.getLeft(), pair.getRight())
-    }
-    writer.finishBucket()
-
-    then:
-    sink.validatedInput()
-  }
-
-  def "GRPCStatusCode field is present in payload for rpc-type spans"() {
-    setup:
-    long startTime = MILLISECONDS.toNanos(System.currentTimeMillis())
-    long duration = SECONDS.toNanos(10)
-    WellKnownTags wellKnownTags = new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language")
-
-    def keyWithGrpc = new MetricKey("grpc.service/Method", "grpc-service", "grpc.server", null, "rpc", 0, false, false, "server", [], null, null, "OK")
-    def keyWithGrpcError = new MetricKey("grpc.service/Method", "grpc-service", "grpc.server", null, "rpc", 0, false, false, "client", [], null, null, "NOT_FOUND")
-    def keyWithoutGrpc = new MetricKey("resource", "service", "operation", null, "web", 200, false, false, "server", [], null, null, null)
-
-    def content = [
-      Pair.of(keyWithGrpc, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithGrpcError, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithoutGrpc, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L)))
-    ]
-
-    ValidatingSink sink = new ValidatingSink(wellKnownTags, startTime, duration, content)
-    SerializingMetricWriter writer = new SerializingMetricWriter(wellKnownTags, sink, 128)
-
-    when:
-    writer.startBucket(content.size(), startTime, duration)
-    for (Pair<MetricKey, AggregateMetric> pair : content) {
-      writer.add(pair.getLeft(), pair.getRight())
-    }
-    writer.finishBucket()
-
-    then:
-    sink.validatedInput()
-  }
-
-  def "HTTPMethod and HTTPEndpoint fields are optional in payload"() {
-    setup:
-    long startTime = MILLISECONDS.toNanos(System.currentTimeMillis())
-    long duration = SECONDS.toNanos(10)
-    WellKnownTags wellKnownTags = new WellKnownTags("runtimeid", "hostname", "env", "service", "version", "language")
-
-    // Create keys with different combinations of HTTP fields
-    def keyWithBoth = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "server", [], "GET", "/api/users", null)
-    def keyWithMethodOnly = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "server", [], "POST", null, null)
-    def keyWithEndpointOnly = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "server", [], null, "/api/orders", null)
-    def keyWithNeither = new MetricKey("resource", "service", "operation", null, "type", 200, false, false, "client", [], null, null, null)
-
-    def content = [
-      Pair.of(keyWithBoth, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithMethodOnly, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithEndpointOnly, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L))),
-      Pair.of(keyWithNeither, new AggregateMetric().recordDurations(1, new AtomicLongArray(1L)))
-    ]
-
-    ValidatingSink sink = new ValidatingSink(wellKnownTags, startTime, duration, content)
-    SerializingMetricWriter writer = new SerializingMetricWriter(wellKnownTags, sink, 128)
-
-    when:
-    writer.startBucket(content.size(), startTime, duration)
-    for (Pair<MetricKey, AggregateMetric> pair : content) {
-      writer.add(pair.getLeft(), pair.getRight())
-    }
-    writer.finishBucket()
-
-    then:
-    sink.validatedInput()
-    // Test passes if validation in ValidatingSink succeeds
-    // ValidatingSink verifies that map size matches actual number of fields
-    // and that HTTPMethod/HTTPEndpoint are only present when non-empty
-  }
 }
diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java
new file mode 100644
index 00000000000..b35dd4cd938
--- /dev/null
+++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java
@@ -0,0 +1,165 @@
+package datadog.trace.common.metrics;
+
+import static datadog.trace.common.metrics.AggregateEntry.ERROR_TAG;
+import static datadog.trace.common.metrics.AggregateEntry.TOP_LEVEL_TAG;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import datadog.metrics.agent.AgentMeter;
+import datadog.metrics.api.statsd.StatsDClient;
+import datadog.metrics.impl.DDSketchHistograms;
+import datadog.metrics.impl.MonitoringImpl;
+import java.util.concurrent.TimeUnit;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+class AggregateEntryTest {
+
+  @BeforeAll
+  static void initAgentMeter() {
+    // recordOneDuration -> Histogram.accept needs AgentMeter to be initialized.
+    MonitoringImpl monitoring = new MonitoringImpl(StatsDClient.NO_OP, 1, TimeUnit.SECONDS);
+    AgentMeter.registerIfAbsent(StatsDClient.NO_OP, monitoring, DDSketchHistograms.FACTORY);
+    monitoring.newTimer("test.init");
+  }
+
+  @Test
+  void recordOneDurationSumsToTotal() {
+    AggregateEntry entry = newEntry();
+    entry.recordOneDuration(1L);
+    entry.recordOneDuration(2L);
+    entry.recordOneDuration(3L);
+    assertEquals(6, entry.getDuration());
+  }
+
+  @Test
+  void clearResetsAllCounters() {
+    AggregateEntry entry = newEntry();
+    entry.recordOneDuration(5L);
+    entry.recordOneDuration(ERROR_TAG | 6L);
+    entry.recordOneDuration(TOP_LEVEL_TAG | 7L);
+    entry.clear();
+    assertEquals(0, entry.getDuration());
+    assertEquals(0, entry.getErrorCount());
+    assertEquals(0, entry.getTopLevelCount());
+    assertEquals(0, entry.getHitCount());
+  }
+
+  @Test
+  void recordOneDurationAccumulatesOkErrorAndTopLevel() {
+    AggregateEntry entry = newEntry();
+    entry.recordOneDuration(10L);
+    entry.recordOneDuration(10L | TOP_LEVEL_TAG);
+    entry.recordOneDuration(10L | ERROR_TAG);
+
+    assertEquals(3, entry.getHitCount());
+    assertEquals(30, entry.getDuration());
+    assertEquals(1, entry.getErrorCount());
+    assertEquals(1, entry.getTopLevelCount());
+  }
+
+  @Test
+  void hitCountIncludesErrors() {
+    AggregateEntry entry = newEntry();
+    entry.recordOneDuration(1L);
+    entry.recordOneDuration(2L);
+    entry.recordOneDuration(3L | ERROR_TAG);
+    assertEquals(3, entry.getHitCount());
+    assertEquals(1, entry.getErrorCount());
+  }
+
+  @Test
+  void okAndErrorLatenciesTrackedSeparately() {
+    AggregateEntry entry = newEntry();
+    long[] durations = {
+      1L, 100L | ERROR_TAG, 2L, 99L | ERROR_TAG, 3L, 98L | ERROR_TAG, 4L, 97L | ERROR_TAG
+    };
+    for (long d : durations) {
+      entry.recordOneDuration(d);
+    }
+    assertTrue(entry.getErrorLatencies().getMaxValue() >= 99);
+    assertTrue(entry.getOkLatencies().getMaxValue() <= 5);
+  }
+
+  @Test
+  void testUtilsEqualsIsConsistentWithHashCodeAcrossDifferentSchemaLayouts() {
+    // Contract test for AggregateEntryTestUtils (the test-side equality helper used by Spock
+    // mock matchers). Production AggregateEntry has no equals override.
+    //
+    // Two entries with identical encoded peerTags but different raw layouts must not be equal,
+    // because hashOf folds in the raw arrays. Equality on the encoded list would let them
+    // collapse while their hashCodes differ -- violating the contract.
+    //
+    //   A: schema ["a","b"], values [null,"x"] -> encoded ["b:x"]
+    //   B: schema ["b","c"], values ["x",null] -> encoded ["b:x"]
+    AggregateEntry a =
+        AggregateEntryTestUtils.forSnapshot(
+            snapshotWithPeerTags(new String[] {"a", "b"}, new String[] {null, "x"}));
+    AggregateEntry b =
+        AggregateEntryTestUtils.forSnapshot(
+            snapshotWithPeerTags(new String[] {"b", "c"}, new String[] {"x", null}));
+
+    // Sanity: same encoded peer tags, despite different raw layout.
+    assertEquals(a.getPeerTags(), b.getPeerTags());
+
+    // Different raw layouts -> entries must not be equal via the test helper.
+    assertFalse(AggregateEntryTestUtils.equals(a, b));
+    // And different hashCodes (matching the inequality).
+    assertNotEquals(AggregateEntryTestUtils.hashCode(a), AggregateEntryTestUtils.hashCode(b));
+  }
+
+  @Test
+  void testUtilsEqualEntriesHaveEqualHashCodes() {
+    AggregateEntry a =
+        AggregateEntryTestUtils.forSnapshot(
+            snapshotWithPeerTags(new String[] {"a", "b"}, new String[] {null, "x"}));
+    AggregateEntry b =
+        AggregateEntryTestUtils.forSnapshot(
+            snapshotWithPeerTags(new String[] {"a", "b"}, new String[] {null, "x"}));
+
+    assertTrue(AggregateEntryTestUtils.equals(a, b));
+    assertEquals(AggregateEntryTestUtils.hashCode(a), AggregateEntryTestUtils.hashCode(b));
+  }
+
+  private static SpanSnapshot snapshotWithPeerTags(String[] names, String[] values) {
+    return AggregateEntryTestUtils.buildSnapshot(
+        "resource",
+        "svc",
+        "op",
+        null,
+        "type",
+        (short) 200,
+        false,
+        true,
+        "client",
+        PeerTagSchema.testSchema(names),
+        values,
+        null,
+        null,
+        null,
+        0L);
+  }
+
+  private static AggregateEntry newEntry() {
+    SpanSnapshot snapshot =
+        AggregateEntryTestUtils.buildSnapshot(
+            "resource",
+            "svc",
+            "op",
+            null,
+            "type",
+            (short) 200,
+            false,
+            true,
+            "client",
+            null,
+            null,
+            null,
+            null,
+            null,
+            0L);
+    return AggregateEntryTestUtils.forSnapshot(snapshot);
+  }
+}
diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java
new file mode 100644
index 00000000000..acc298fd436
--- /dev/null
+++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java
@@ -0,0 +1,175 @@
+package datadog.trace.common.metrics;
+
+import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import javax.annotation.Nullable;
+
+/**
+ * Test-side helpers for {@link AggregateEntry}: a positional-args fixture factory plus a field-wise
+ * equality contract for use with Spock mock argument matchers and JUnit assertions. Lives in {@code
+ * src/test} so the production class stays free of test-only API; same {@code
+ * datadog.trace.common.metrics} package so this helper can reach package-private fields and
+ * constructors.
+ *
+ * <p>Production {@code AggregateEntry} intentionally has no {@code equals}/{@code hashCode}
+ * override -- {@link AggregateTable} bucketing goes through {@link AggregateEntry#matches} keyed on
+ * {@link AggregateEntry#keyHash}, and no production code path invokes {@link Object#equals}.
+ *
+ * <p>The equality helper compares the raw {@code peerTagNames}/{@code peerTagValues} arrays (not
+ * the encoded {@code peerTags} list) so it stays consistent with {@link AggregateEntry#hashOf},
+ * which folds in raw arrays via {@link PeerTagSchema#hashCode()} and {@link
+ * Arrays#hashCode(Object[])}. Comparing the encoded list would let two entries with different raw
+ * layouts (e.g. tag {@code "b"} at index 1 in schema A vs index 0 in schema B, with matching
+ * values) collapse to the same encoded form -- a real bug surfaced during PR #11382 review.
+ */
+public final class AggregateEntryTestUtils {
+  private AggregateEntryTestUtils() {}
+
+  /**
+   * Builds an {@link AggregateEntry} from the same positional shape the prior {@code new
+   * MetricKey(...)} took. Accepts a pre-encoded {@code List<UTF8BytesString>} of {@code
+   * "name:value"} peer tags and recovers the parallel-array {@code (names, values)} form by
+   * splitting on the {@code ':'} delimiter.
+   *
+   * <p><b>Test-only.</b> The split is at the <em>first</em> {@code ':'}, so peer-tag values
+   * containing a colon (URLs, IPv6 addresses, {@code service:env} patterns) will be silently
+   * misparsed and the recovered (name, value) pair will be wrong. Keep test data colon-free in
+   * peer-tag values, or wire a production-style snapshot through {@link #forSnapshot(SpanSnapshot)}
+   * directly instead.
+   */
+  public static AggregateEntry of(
+      CharSequence resource,
+      CharSequence service,
+      CharSequence operationName,
+      @Nullable CharSequence serviceSource,
+      CharSequence type,
+      int httpStatusCode,
+      boolean synthetic,
+      boolean traceRoot,
+      CharSequence spanKind,
+      @Nullable List<UTF8BytesString> peerTags,
+      @Nullable CharSequence httpMethod,
+      @Nullable CharSequence httpEndpoint,
+      @Nullable CharSequence grpcStatusCode) {
+    PeerTagSchema schema = null;
+    String[] values = null;
+    if (peerTags != null && !peerTags.isEmpty()) {
+      String[] names = new String[peerTags.size()];
+      values = new String[peerTags.size()];
+      int i = 0;
+      for (UTF8BytesString t : peerTags) {
+        String s = t.toString();
+        int colon = s.indexOf(':');
+        names[i] = colon < 0 ? s : s.substring(0, colon);
+        values[i] = colon < 0 ? "" : s.substring(colon + 1);
+        i++;
+      }
+      schema = PeerTagSchema.testSchema(names);
+    }
+    SpanSnapshot syntheticSnapshot =
+        buildSnapshot(
+            resource,
+            service,
+            operationName,
+            serviceSource,
+            type,
+            (short) httpStatusCode,
+            synthetic,
+            traceRoot,
+            spanKind,
+            schema,
+            values,
+            httpMethod,
+            httpEndpoint,
+            grpcStatusCode,
+            0L);
+    return forSnapshot(syntheticSnapshot);
+  }
+
+  /**
+   * Test-only snapshot builder. Production fillSlot canonicalizes each field via the per-field
+   * DDCaches on {@link AggregateEntry} and precomputes {@link SpanSnapshot#keyHash}; this helper
+   * does the same so tests exercise the production matches/hash path.
+   */
+  static SpanSnapshot buildSnapshot(
+      CharSequence resource,
+      CharSequence service,
+      CharSequence operationName,
+      @Nullable CharSequence serviceSource,
+      CharSequence type,
+      short httpStatusCode,
+      boolean synthetic,
+      boolean traceRoot,
+      CharSequence spanKind,
+      @Nullable PeerTagSchema peerTagSchema,
+      @Nullable String[] peerTagValues,
+      @Nullable CharSequence httpMethod,
+      @Nullable CharSequence httpEndpoint,
+      @Nullable CharSequence grpcStatusCode,
+      long tagAndDuration) {
+    SpanSnapshot s = new SpanSnapshot();
+    s.resourceName = AggregateEntry.canonicalize(AggregateEntry.RESOURCE_CACHE, resource);
+    s.serviceName = AggregateEntry.canonicalize(AggregateEntry.SERVICE_CACHE, service);
+    s.operationName = AggregateEntry.canonicalize(AggregateEntry.OPERATION_CACHE, operationName);
+    s.serviceNameSource =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.SERVICE_SOURCE_CACHE, serviceSource);
+    s.spanType = AggregateEntry.canonicalize(AggregateEntry.TYPE_CACHE, type);
+    s.httpStatusCode = httpStatusCode;
+    s.synthetic = synthetic;
+    s.traceRoot = traceRoot;
+    s.spanKind = AggregateEntry.canonicalize(AggregateEntry.SPAN_KIND_CACHE, spanKind);
+    s.peerTagSchema = peerTagSchema;
+    s.peerTagValues = peerTagValues;
+    s.httpMethod =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.HTTP_METHOD_CACHE, httpMethod);
+    s.httpEndpoint =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.HTTP_ENDPOINT_CACHE, httpEndpoint);
+    s.grpcStatusCode =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.GRPC_STATUS_CODE_CACHE, grpcStatusCode);
+    s.tagAndDuration = tagAndDuration;
+    SpanSnapshot.computeAndSetKeyHash(s);
+    return s;
+  }
+
+  /**
+   * Builds an {@link AggregateEntry} from {@code s}. Production callers route through {@link
+   * AggregateTable#findOrInsert}; tests use this helper for direct construction.
+   */
+  public static AggregateEntry forSnapshot(SpanSnapshot s) {
+    return new AggregateEntry(s);
+  }
+
+  /**
+   * Whether {@code a} and {@code b} carry identical label fields. Counter and histogram state is
+   * intentionally excluded -- this compares the key identity, not the aggregate.
+   */
+  public static boolean equals(AggregateEntry a, AggregateEntry b) {
+    if (a == b) return true;
+    if (a == null || b == null) return false;
+    return a.getHttpStatusCode() == b.getHttpStatusCode()
+        && a.isSynthetics() == b.isSynthetics()
+        && a.isTraceRoot() == b.isTraceRoot()
+        && Objects.equals(a.getResource(), b.getResource())
+        && Objects.equals(a.getService(), b.getService())
+        && Objects.equals(a.getOperationName(), b.getOperationName())
+        && Objects.equals(a.getServiceSource(), b.getServiceSource())
+        && Objects.equals(a.getType(), b.getType())
+        && Objects.equals(a.getSpanKind(), b.getSpanKind())
+        && Arrays.equals(a.peerTagNames, b.peerTagNames)
+        && Arrays.equals(a.peerTagValues, b.peerTagValues)
+        && Objects.equals(a.getHttpMethod(), b.getHttpMethod())
+        && Objects.equals(a.getHttpEndpoint(), b.getHttpEndpoint())
+        && Objects.equals(a.getGrpcStatusCode(), b.getGrpcStatusCode());
+  }
+
+  /**
+   * Stable hash matching {@link #equals(AggregateEntry, AggregateEntry)} -- derived from {@link
+   * AggregateEntry#keyHash}, which {@link AggregateEntry#hashOf} computes from the same raw fields
+   * the helper's {@code equals} compares.
+   */
+  public static int hashCode(AggregateEntry e) {
+    return e == null ? 0 : (int) e.keyHash;
+  }
+}
diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java
new file mode 100644
index 00000000000..e807dfdd338
--- /dev/null
+++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java
@@ -0,0 +1,429 @@
+package datadog.trace.common.metrics;
+
+import static datadog.trace.common.metrics.AggregateEntry.ERROR_TAG;
+import static datadog.trace.common.metrics.AggregateEntry.TOP_LEVEL_TAG;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import datadog.metrics.agent.AgentMeter;
+import datadog.metrics.api.statsd.StatsDClient;
+import datadog.metrics.impl.DDSketchHistograms;
+import datadog.metrics.impl.MonitoringImpl;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+class AggregateTableTest {
+
+  @BeforeAll
+  static void initAgentMeter() {
+    // AggregateEntry.recordOneDuration -> Histogram.accept needs AgentMeter to be initialized.
+    MonitoringImpl monitoring = new MonitoringImpl(StatsDClient.NO_OP, 1, TimeUnit.SECONDS);
+    AgentMeter.registerIfAbsent(StatsDClient.NO_OP, monitoring, DDSketchHistograms.FACTORY);
+    monitoring.newTimer("test.init");
+  }
+
+  @Test
+  void insertOnMissReturnsNewAggregate() {
+    AggregateTable table = new AggregateTable(8);
+    SpanSnapshot s = snapshot("svc", "op", "client");
+
+    AggregateEntry agg = table.findOrInsert(s);
+
+    assertNotNull(agg);
+    assertEquals(1, table.size());
+    assertEquals(0, agg.getHitCount());
+  }
+
+  @Test
+  void hitReturnsSameAggregateInstance() {
+    AggregateTable table = new AggregateTable(8);
+    SpanSnapshot s1 = snapshot("svc", "op", "client");
+    SpanSnapshot s2 = snapshot("svc", "op", "client");
+
+    AggregateEntry first = table.findOrInsert(s1);
+    AggregateEntry second = table.findOrInsert(s2);
+
+    assertSame(first, second);
+    assertEquals(1, table.size());
+  }
+
+  @Test
+  void differentKindFieldsAreDistinct() {
+    AggregateTable table = new AggregateTable(8);
+
+    AggregateEntry clientAgg = table.findOrInsert(snapshot("svc", "op", "client"));
+    AggregateEntry serverAgg = table.findOrInsert(snapshot("svc", "op", "server"));
+
+    assertNotSame(clientAgg, serverAgg);
+    assertEquals(2, table.size());
+  }
+
+  @Test
+  void peerTagPairsParticipateInIdentity() {
+    AggregateTable table = new AggregateTable(8);
+    SpanSnapshot withTags =
+        builder("svc", "op", "client").peerTags("peer.hostname", "host-a").build();
+    SpanSnapshot otherTags =
+        builder("svc", "op", "client").peerTags("peer.hostname", "host-b").build();
+    SpanSnapshot noTags = builder("svc", "op", "client").build();
+
+    AggregateEntry a = table.findOrInsert(withTags);
+    AggregateEntry b = table.findOrInsert(otherTags);
+    AggregateEntry c = table.findOrInsert(noTags);
+
+    assertNotSame(a, b);
+    assertNotSame(a, c);
+    assertNotSame(b, c);
+    assertEquals(3, table.size());
+  }
+
+  @Test
+  void capOverrunEvictsStaleEntry() {
+    AggregateTable table = new AggregateTable(2);
+
+    AggregateEntry stale = table.findOrInsert(snapshot("svc-a", "op", "client"));
+    // do not record on stale -> hitCount stays at 0
+
+    AggregateEntry live = table.findOrInsert(snapshot("svc-b", "op", "client"));
+    live.recordOneDuration(10L | TOP_LEVEL_TAG); // hitCount=1, not evictable
+
+    // table is full (size=2). Inserting a third should evict the stale one and succeed.
+    AggregateEntry newcomer = table.findOrInsert(snapshot("svc-c", "op", "client"));
+    assertNotNull(newcomer);
+    assertEquals(2, table.size());
+
+    // re-inserting the stale snapshot should miss now (it was evicted) and produce a fresh entry
+    AggregateEntry staleAgain = table.findOrInsert(snapshot("svc-a", "op", "client"));
+    assertNotSame(stale, staleAgain);
+  }
+
+  @Test
+  void backToBackEvictionsAllSucceed() {
+    // Cursor amortization regression: cap the table, fill with stale entries, then force a
+    // sequence of cap-overrun inserts. Each insert must succeed (evicting one stale entry and
+    // inserting one new). The cursor field is internal, but if it were ever wedged (e.g.
+    // pointing past the end of buckets, or not advancing after a successful eviction), some
+    // later insert would fail to find a stale entry. Drives ~3x the capacity worth of inserts to
+    // give wrap-around plenty of chances to misbehave.
+    AggregateTable table = new AggregateTable(8);
+    for (int i = 0; i < 8; i++) {
+      table.findOrInsert(snapshot("init-" + i, "op", "client"));
+    }
+    for (int i = 0; i < 32; i++) {
+      AggregateEntry inserted = table.findOrInsert(snapshot("post-" + i, "op", "client"));
+      assertNotNull(
+          inserted, "insert #" + i + " should evict a stale entry and succeed (table full)");
+    }
+    assertEquals(8, table.size());
+  }
+
+  @Test
+  void clearResetsCursorForSubsequentEvictions() {
+    // The cursor must reset to 0 on clear so a re-filled table doesn't start eviction at a
+    // stale bucket index. Verified indirectly: clear and re-fill, then force an eviction; the
+    // newcomer must successfully take a slot (which only works if a stale entry was found).
+    AggregateTable table = new AggregateTable(4);
+
+    // Fill, age, evict once -- cursor lands at some non-zero bucket
+    for (int i = 0; i < 4; i++) {
+      table.findOrInsert(snapshot("warm-" + i, "op", "client"));
+    }
+    table.findOrInsert(snapshot("evict-trigger", "op", "client"));
+
+    table.clear();
+    assertEquals(0, table.size());
+
+    // Re-fill, age, force eviction -- should still find a stale entry from bucket 0 onward
+    for (int i = 0; i < 4; i++) {
+      table.findOrInsert(snapshot("fresh-" + i, "op", "client"));
+    }
+    AggregateEntry newcomer = table.findOrInsert(snapshot("post-clear", "op", "client"));
+    assertNotNull(newcomer, "post-clear cap-overrun insert must succeed via cursor-reset evict");
+  }
+
+  @Test
+  void capOverrunWithNoStaleReturnsNull() {
+    AggregateTable table = new AggregateTable(2);
+
+    AggregateEntry a = table.findOrInsert(snapshot("svc-a", "op", "client"));
+    AggregateEntry b = table.findOrInsert(snapshot("svc-b", "op", "client"));
+    a.recordOneDuration(10L);
+    b.recordOneDuration(20L);
+
+    AggregateEntry c = table.findOrInsert(snapshot("svc-c", "op", "client"));
+    assertNull(c);
+    assertEquals(2, table.size());
+  }
+
+  @Test
+  void expungeStaleAggregatesRemovesZeroHitsOnly() {
+    AggregateTable table = new AggregateTable(16);
+
+    AggregateEntry live = table.findOrInsert(snapshot("svc-live", "op", "client"));
+    live.recordOneDuration(10L);
+    AggregateEntry stale1 = table.findOrInsert(snapshot("svc-stale1", "op", "client"));
+    AggregateEntry stale2 = table.findOrInsert(snapshot("svc-stale2", "op", "client"));
+    assertEquals(3, table.size());
+    assertEquals(0, stale1.getHitCount());
+    assertEquals(0, stale2.getHitCount());
+
+    table.expungeStaleAggregates();
+
+    assertEquals(1, table.size());
+    // the live entry must still be reachable
+    assertSame(live, table.findOrInsert(snapshot("svc-live", "op", "client")));
+  }
+
+  @Test
+  void forEachVisitsEveryEntry() {
+    AggregateTable table = new AggregateTable(8);
+    table.findOrInsert(snapshot("a", "op", "client")).recordOneDuration(1L);
+    table.findOrInsert(snapshot("b", "op", "client")).recordOneDuration(2L);
+    table.findOrInsert(snapshot("c", "op", "client")).recordOneDuration(3L | ERROR_TAG);
+
+    Map<String, Long> visited = new HashMap<>();
+    table.forEach(e -> visited.put(e.getService().toString(), e.getDuration()));
+
+    assertEquals(3, visited.size());
+    assertEquals(1L, visited.get("a"));
+    assertEquals(2L, visited.get("b"));
+    assertEquals(3L, visited.get("c"));
+  }
+
+  @Test
+  void clearEmptiesTheTable() {
+    AggregateTable table = new AggregateTable(8);
+    table.findOrInsert(snapshot("a", "op", "client"));
+    table.findOrInsert(snapshot("b", "op", "client"));
+    assertEquals(2, table.size());
+
+    table.clear();
+
+    assertTrue(table.isEmpty());
+    assertEquals(0, table.size());
+    // and re-insertion works after clear
+    assertNotNull(table.findOrInsert(snapshot("a", "op", "client")));
+  }
+
+  @Test
+  void encodedLabelsAreBuiltOnInsert() {
+    AggregateTable table = new AggregateTable(4);
+    List<AggregateEntry> seen = new ArrayList<>();
+    table.findOrInsert(snapshot("svc", "op", "client"));
+    table.forEach(seen::add);
+
+    assertEquals(1, seen.size());
+    AggregateEntry e = seen.get(0);
+    assertEquals("svc", e.getService().toString());
+    assertEquals("op", e.getOperationName().toString());
+    assertEquals("client", e.getSpanKind().toString());
+  }
+
+  @Test
+  void nullAndEmptyOptionalFieldsCollapseToOneEntry() {
+    // null and length-zero are treated as equivalent for optional fields, so snapshots that
+    // differ only in null-vs-"" land on the same entry.
+    AggregateTable table = new AggregateTable(8);
+
+    SpanSnapshot snapNull = nullableSnapshot(null, null, null, null);
+    SpanSnapshot snapEmpty = nullableSnapshot("", "", "", "");
+
+    AggregateEntry first = table.findOrInsert(snapNull);
+    AggregateEntry secondNull = table.findOrInsert(nullableSnapshot(null, null, null, null));
+    AggregateEntry forEmpty = table.findOrInsert(snapEmpty);
+
+    assertSame(first, secondNull, "two null-fielded snapshots must hit the same entry");
+    assertSame(first, forEmpty, "null- and empty-fielded snapshots must hit the same entry");
+    assertEquals(1, table.size());
+  }
+
+  @Test
+  void nullServiceAndSpanKindDoNotNpeAndCollapseWithEmpty() {
+    // Null service and spanKind are accepted (canonicalize to length-zero) and collapse with
+    // empty-string variants onto the same entry.
+    AggregateTable table = new AggregateTable(8);
+
+    SpanSnapshot allNulls = nullServiceKindSnapshot(null, null);
+    SpanSnapshot allEmpty = nullServiceKindSnapshot("", "");
+
+    AggregateEntry first = table.findOrInsert(allNulls);
+    AggregateEntry secondNull = table.findOrInsert(nullServiceKindSnapshot(null, null));
+    AggregateEntry forEmpty = table.findOrInsert(allEmpty);
+
+    assertSame(first, secondNull, "two null-service/-kind snapshots must hit the same entry");
+    assertSame(first, forEmpty, "null- and empty-service/-kind snapshots must hit the same entry");
+    assertEquals(1, table.size());
+    assertEquals(0, first.getService().length(), "null serviceName should canonicalize to EMPTY");
+    assertEquals(0, first.getSpanKind().length(), "null spanKind should canonicalize to EMPTY");
+  }
+
+  @Test
+  void noPeerTagSnapshotMatchesEntryEvenWithStaleScratchPeerTagValues() {
+    // Models the ring-buffer slot-reuse pattern: a slot whose previous publish fired peer tags
+    // ends up reused for a span with no peer tags. The producer clears peerTagSchema but leaves
+    // peerTagValues populated (the array is the slot's reusable scratch). The consumer must
+    // gate on peerTagSchema and ignore stale scratch values for both hashOf and matches --
+    // otherwise the slot's stale state would steer the snapshot to a different bucket or fail
+    // the field-by-field comparison, producing a duplicate aggregate.
+    AggregateTable table = new AggregateTable(8);
+    SpanSnapshot cleanNoPeerTags = snapshot("svc", "op", "internal");
+    AggregateEntry inserted = table.findOrInsert(cleanNoPeerTags);
+
+    SpanSnapshot withStaleScratch = snapshot("svc", "op", "internal");
+    // Simulate stale scratch: same logical contents (no peer tags -> schema null), but the
+    // values array carries leftover data the slot's prior tag-firing publish wrote into it.
+    withStaleScratch.peerTagSchema = null;
+    withStaleScratch.peerTagValues = new String[] {"leftover.host", "leftover.service"};
+
+    AggregateEntry hit = table.findOrInsert(withStaleScratch);
+    assertSame(inserted, hit, "stale peerTagValues must be ignored when peerTagSchema is null");
+    assertEquals(1, table.size());
+  }
+
+  @Test
+  void aggregateEntryCopiesPeerTagValuesSoSlotReuseDoesntCorruptIt() {
+    // The AggregateEntry must snapshot peerTagValues by value at insert time -- otherwise
+    // when the producer reuses the slot's array on the next publish, the entry's identity
+    // would silently change. Verify by mutating the source array after insert and confirming
+    // the entry still matches its original-content snapshot.
+    AggregateTable table = new AggregateTable(8);
+    String[] values = new String[] {"host1", "svc1"};
+    SpanSnapshot s1 = snapshotWithPeerTags("svc", "op", "client", values);
+    AggregateEntry inserted = table.findOrInsert(s1);
+
+    // Producer "reuses" the array: same reference, new contents.
+    values[0] = "host2";
+    values[1] = "svc2";
+
+    // A FRESH snapshot with the original peer tag values should still match the entry.
+    SpanSnapshot s2 = snapshotWithPeerTags("svc", "op", "client", new String[] {"host1", "svc1"});
+    AggregateEntry hit = table.findOrInsert(s2);
+    assertSame(inserted, hit, "entry's peer tag values must be a snapshot, not a live ref");
+  }
+
+  private static SpanSnapshot snapshotWithPeerTags(
+      String service, String operation, String spanKind, String[] peerTagValues) {
+    PeerTagSchema schema = PeerTagSchema.testSchema(new String[] {"peer.host", "peer.service"});
+    return AggregateEntryTestUtils.buildSnapshot(
+        "resource",
+        service,
+        operation,
+        null,
+        "web",
+        (short) 200,
+        false,
+        true,
+        spanKind,
+        schema,
+        peerTagValues,
+        null,
+        null,
+        null,
+        0L);
+  }
+
+  private static SpanSnapshot nullServiceKindSnapshot(String service, String spanKind) {
+    return AggregateEntryTestUtils.buildSnapshot(
+        "resource",
+        service,
+        "op",
+        null,
+        "web",
+        (short) 200,
+        false,
+        true,
+        spanKind,
+        null,
+        null,
+        null,
+        null,
+        null,
+        0L);
+  }
+
+  private static SpanSnapshot nullableSnapshot(
+      String resource, String operation, String type, String serviceNameSource) {
+    return AggregateEntryTestUtils.buildSnapshot(
+        resource,
+        "svc",
+        operation,
+        serviceNameSource,
+        type,
+        (short) 200,
+        false,
+        true,
+        "client",
+        null,
+        null,
+        null,
+        null,
+        null,
+        0L);
+  }
+
+  // ---------- helpers ----------
+
+  private static SpanSnapshot snapshot(String service, String operation, String spanKind) {
+    return builder(service, operation, spanKind).build();
+  }
+
+  private static SnapshotBuilder builder(String service, String operation, String spanKind) {
+    return new SnapshotBuilder(service, operation, spanKind);
+  }
+
+  private static final class SnapshotBuilder {
+    private final String service;
+    private final String operation;
+    private final String spanKind;
+    private PeerTagSchema peerTagSchema;
+    private String[] peerTagValues;
+    private long tagAndDuration = 0L;
+
+    SnapshotBuilder(String service, String operation, String spanKind) {
+      this.service = service;
+      this.operation = operation;
+      this.spanKind = spanKind;
+    }
+
+    SnapshotBuilder peerTags(String... namesAndValues) {
+      int pairCount = namesAndValues.length / 2;
+      String[] names = new String[pairCount];
+      String[] values = new String[pairCount];
+      for (int i = 0; i < pairCount; i++) {
+        names[i] = namesAndValues[2 * i];
+        values[i] = namesAndValues[2 * i + 1];
+      }
+      this.peerTagSchema = PeerTagSchema.testSchema(names);
+      this.peerTagValues = values;
+      return this;
+    }
+
+    SpanSnapshot build() {
+      return AggregateEntryTestUtils.buildSnapshot(
+          "resource",
+          service,
+          operation,
+          null,
+          "web",
+          (short) 200,
+          false,
+          true,
+          spanKind,
+          peerTagSchema,
+          peerTagValues,
+          null,
+          null,
+          null,
+          tagAndDuration);
+    }
+  }
+}
diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBootstrapTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBootstrapTest.java
index ef07e0fbc19..59681d4724e 100644
--- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBootstrapTest.java
+++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorBootstrapTest.java
@@ -217,9 +217,9 @@ void reconcileSurvivesStateChangeWhenTagsUnchanged() throws Exception {
   void reconcileSwapsSchemaWhenTagSetChanges() throws Exception {
     // The reconcile slow-path's swap branch: discovery refreshes the state AND the tag set
     // grows. Cached schema is rebuilt and the volatile reference points at the new schema.
-    // Verification is end-to-end -- we look at the MetricKey the writer receives. Pre-swap the
-    // span snapshot was pinned to the old schema so only peer.hostname appears; post-swap a new
-    // publish reads the new schema and the next flush carries both peer tags.
+    // Verification is end-to-end -- we look at the AggregateEntry the writer receives. Pre-swap
+    // the span snapshot was pinned to the old schema so only peer.hostname appears; post-swap a
+    // new publish reads the new schema and the next flush carries both peer tags.
     HealthMetrics healthMetrics = mock(HealthMetrics.class);
     MetricWriter writer = mock(MetricWriter.class);
     Sink sink = mock(Sink.class);
@@ -266,8 +266,8 @@ void reconcileSwapsSchemaWhenTagSetChanges() throws Exception {
           .finishBucket();
 
       // Publish 1: snapshot pinned to the original {peer.hostname} schema. cycle 1's reconcile
-      // will swap the cached schema BEFORE the flush, but this snapshot is already pinned so its
-      // MetricKey will still carry only peer.hostname.
+      // will swap the cached schema BEFORE the flush, but this snapshot is already pinned so the
+      // resulting AggregateEntry will still carry only peer.hostname.
       aggregator.publish(
           Collections.<CoreSpan<?>>singletonList(peerAggregationSpanWithBothPeerTags()));
       aggregator.report();
@@ -280,20 +280,20 @@ void reconcileSwapsSchemaWhenTagSetChanges() throws Exception {
       aggregator.report();
       assertTrue(cycle2.await(2, SECONDS));
 
-      // Capture every (MetricKey, AggregateMetric) the writer saw across both cycles. Pre-swap
-      // snapshot has 1 peer tag, post-swap has 2.
-      ArgumentCaptor<MetricKey> keyCaptor = ArgumentCaptor.forClass(MetricKey.class);
-      verify(writer, times(2)).add(keyCaptor.capture(), any(AggregateMetric.class));
-      List<MetricKey> keys = keyCaptor.getAllValues();
+      // Capture every AggregateEntry the writer saw across both cycles. Pre-swap snapshot has 1
+      // peer tag, post-swap has 2.
+      ArgumentCaptor<AggregateEntry> entryCaptor = ArgumentCaptor.forClass(AggregateEntry.class);
+      verify(writer, times(2)).add(entryCaptor.capture());
+      List<AggregateEntry> entries = entryCaptor.getAllValues();
       assertEquals(
           Collections.singletonList(UTF8BytesString.create("peer.hostname:localhost")),
-          keys.get(0).getPeerTags(),
+          entries.get(0).getPeerTags(),
           "pre-swap snapshot should encode only peer.hostname");
       assertEquals(
           Arrays.asList(
               UTF8BytesString.create("peer.hostname:localhost"),
               UTF8BytesString.create("peer.service:billing")),
-          keys.get(1).getPeerTags(),
+          entries.get(1).getPeerTags(),
           "post-swap snapshot should encode both peer.hostname and peer.service");
 
       // Bootstrap (1) + cycle 1 slow-path (1) -- cycle 2 is fast-path so doesn't reach peerTags().
diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorDisableTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorDisableTest.java
new file mode 100644
index 00000000000..d072371d25d
--- /dev/null
+++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/ConflatingMetricsAggregatorDisableTest.java
@@ -0,0 +1,234 @@
+package datadog.trace.common.metrics;
+
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyLong;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.after;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.reset;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import datadog.communication.ddagent.DDAgentFeaturesDiscovery;
+import datadog.trace.bootstrap.instrumentation.api.Tags;
+import datadog.trace.core.CoreSpan;
+import datadog.trace.core.SpanKindFilter;
+import datadog.trace.core.monitor.HealthMetrics;
+import java.util.Collections;
+import java.util.concurrent.CountDownLatch;
+import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
+
+/**
+ * Coverage for the {@code disable() -> ClearSignal.CLEAR} threading routing introduced in this PR.
+ *
+ * <p>The bundled fix routes the agent-downgrade clear through the inbox so the aggregator thread
+ * stays the sole writer to {@link AggregateTable} (which is not thread-safe). The behavioral
+ * contract this test pins:
+ *
+ * <ul>
+ *   <li>{@code onEvent(DOWNGRADED)} can fire from a non-aggregator thread (in production, the
+ *       OkHttpSink callback thread).
+ *   <li>By the time the next report cycle reconciles peer-tag schema on the aggregator thread, the
+ *       {@code AggregateTable} has been cleared -- {@code CLEAR} arrived in the FIFO inbox before
+ *       the {@code REPORT} signal triggered by {@code aggregator.report()}.
+ *   <li>The aggregator therefore flushes nothing on that next report cycle: no {@code startBucket},
+ *       no {@code add}, no {@code finishBucket}.
+ * </ul>
+ *
+ * <p>The test would fail if {@code disable()} reverted to mutating {@code AggregateTable} directly
+ * (the pre-fix path) only via races -- not deterministically -- so the assertions here are about
+ * the observable end-to-end shape rather than thread identity.
+ */
+class ConflatingMetricsAggregatorDisableTest {
+
+  @Test
+  void downgradeRoutesClearThroughInboxBeforeNextReport() throws Exception {
+    HealthMetrics healthMetrics = mock(HealthMetrics.class);
+    MetricWriter writer = mock(MetricWriter.class);
+    Sink sink = mock(Sink.class);
+    DDAgentFeaturesDiscovery features = mock(DDAgentFeaturesDiscovery.class);
+    when(features.supportsMetrics()).thenReturn(true);
+    when(features.peerTags()).thenReturn(Collections.<String>emptySet());
+    when(features.state()).thenReturn("state-1");
+
+    ConflatingMetricsAggregator aggregator =
+        new ConflatingMetricsAggregator(
+            Collections.<String>emptySet(),
+            features,
+            healthMetrics,
+            sink,
+            writer,
+            /* maxAggregates */ 16,
+            /* queueSize */ 64,
+            /* reportingInterval */ 10,
+            SECONDS,
+            /* includeEndpointInMetrics */ false);
+    aggregator.start();
+    try {
+      // Baseline: publish a span, run a report, verify the table flushes normally. This gives
+      // us a clean post-first-report state with the aggregator's reconcile already having fired
+      // once on the aggregator thread.
+      CountDownLatch firstFlush = new CountDownLatch(1);
+      org.mockito.Mockito.doAnswer(
+              invocation -> {
+                firstFlush.countDown();
+                return null;
+              })
+          .when(writer)
+          .finishBucket();
+
+      aggregator.publish(Collections.<CoreSpan<?>>singletonList(metricsEligibleSpan()));
+      aggregator.report();
+      assertTrue(firstFlush.await(2, SECONDS));
+
+      // Reset writer-side mock interactions so the post-disable verify() blocks below only see
+      // what happens after the downgrade. features mock keeps accumulating call counts -- we use
+      // those counts as a latch on aggregator-thread reconcile timing.
+      reset(writer);
+
+      // Flip the discovery state. disable()'s first action is features.discover() followed by a
+      // features.supportsMetrics() check; returning false here selects the clear path.
+      when(features.supportsMetrics()).thenReturn(false);
+
+      // Fire DOWNGRADED on the test thread. This is the production scenario where the OkHttpSink
+      // callback thread triggers onEvent. disable() offers ClearSignal.CLEAR to the inbox but
+      // does not (and must not) mutate AggregateTable directly here.
+      aggregator.onEvent(EventListener.EventType.DOWNGRADED, "");
+
+      // First: verify nothing flushes immediately after disable. We can't pin reconcile-on-the-
+      // aggregator-thread as a latch here because CLEAR's inbox.clear() drops any REPORT we'd
+      // queue behind it -- so we just wait a window for any flush attempt to materialize.
+      verify(writer, after(500).never()).startBucket(anyInt(), anyLong(), anyLong());
+
+      // Stronger contract: prove the table is actually empty after CLEAR by re-enabling metrics
+      // and publishing a *marker* span with a distinct resource name. The next report should
+      // flush exactly one entry -- the marker -- with the original "resource" gone. If disable()
+      // had failed to clear the table (or had cleared it from the wrong thread and corrupted
+      // bucket chains), this assertion would catch it.
+      when(features.supportsMetrics()).thenReturn(true);
+      CountDownLatch postClearFlush = new CountDownLatch(1);
+      org.mockito.Mockito.doAnswer(
+              invocation -> {
+                postClearFlush.countDown();
+                return null;
+              })
+          .when(writer)
+          .finishBucket();
+      aggregator.publish(Collections.<CoreSpan<?>>singletonList(markerSpan()));
+      aggregator.report();
+      assertTrue(postClearFlush.await(2, SECONDS));
+
+      ArgumentCaptor<AggregateEntry> entryCaptor = ArgumentCaptor.forClass(AggregateEntry.class);
+      verify(writer, times(1)).add(entryCaptor.capture());
+      assertEquals(
+          "marker-resource",
+          entryCaptor.getValue().getResource().toString(),
+          "post-CLEAR bucket should contain only the marker -- the original entry was wiped");
+    } finally {
+      aggregator.close();
+    }
+  }
+
+  @Test
+  void clearDoesNotTrampleQueuedStopSignal() throws Exception {
+    // CLEAR handler clears only the aggregates table; queued signals (STOP, REPORT) survive and
+    // get processed normally.
+    HealthMetrics healthMetrics = mock(HealthMetrics.class);
+    MetricWriter writer = mock(MetricWriter.class);
+    Sink sink = mock(Sink.class);
+    DDAgentFeaturesDiscovery features = mock(DDAgentFeaturesDiscovery.class);
+    when(features.supportsMetrics()).thenReturn(true);
+    when(features.peerTags()).thenReturn(Collections.<String>emptySet());
+    when(features.state()).thenReturn("state-1");
+
+    ConflatingMetricsAggregator aggregator =
+        new ConflatingMetricsAggregator(
+            Collections.<String>emptySet(),
+            features,
+            healthMetrics,
+            sink,
+            writer,
+            /* maxAggregates */ 16,
+            /* queueSize */ 64,
+            /* reportingInterval */ 10,
+            SECONDS,
+            /* includeEndpointInMetrics */ false);
+    aggregator.start();
+
+    // Force at least one snapshot into the inbox so the aggregator has something to drain.
+    aggregator.publish(Collections.<CoreSpan<?>>singletonList(metricsEligibleSpan()));
+
+    // Fire DOWNGRADED on this thread. disable() flips supportsMetrics() to false and offers
+    // CLEAR. Then immediately call close() which offers STOP. If CLEAR's handler clears the
+    // inbox, STOP gets trampled and close() hangs until the join timeout.
+    when(features.supportsMetrics()).thenReturn(false);
+    aggregator.onEvent(EventListener.EventType.DOWNGRADED, "");
+
+    // close() is synchronous; bound it ourselves rather than trusting THREAD_JOIN_TIMEOUT_MS.
+    long deadlineNanos = System.nanoTime() + java.util.concurrent.TimeUnit.SECONDS.toNanos(2);
+    Thread closer = new Thread(aggregator::close, "test-closer");
+    closer.start();
+    while (closer.isAlive() && System.nanoTime() < deadlineNanos) {
+      closer.join(50);
+    }
+    assertTrue(
+        !closer.isAlive(),
+        "close() must return promptly -- if CLEAR trampled STOP, this hangs out the join timeout");
+  }
+
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private static CoreSpan<?> metricsEligibleSpan() {
+    CoreSpan span = mock(CoreSpan.class);
+    when(span.isMeasured()).thenReturn(false);
+    when(span.isTopLevel()).thenReturn(true);
+    // Return true for any SpanKindFilter so peerTagSchemaFor enters the bootstrap path on the
+    // first publish. We want that bootstrap to fire (it's what makes features.state()
+    // observable), even though peerTags() returns emptySet here and the resulting schema has
+    // size 0.
+    when(span.isKind(any(SpanKindFilter.class))).thenReturn(true);
+    when(span.getLongRunningVersion()).thenReturn(0);
+    when(span.getDurationNano()).thenReturn(100L);
+    when(span.getError()).thenReturn(0);
+    when(span.getResourceName()).thenReturn("resource");
+    when(span.getServiceName()).thenReturn("svc");
+    when(span.getOperationName()).thenReturn("op");
+    when(span.getServiceNameSource()).thenReturn(null);
+    when(span.getType()).thenReturn("web");
+    when(span.getHttpStatusCode()).thenReturn((short) 200);
+    when(span.getParentId()).thenReturn(0L);
+    when(span.getOrigin()).thenReturn(null);
+    when(span.unsafeGetTag(eq(Tags.SPAN_KIND), any(CharSequence.class))).thenReturn("client");
+    return span;
+  }
+
+  /**
+   * Distinct from {@link #metricsEligibleSpan()} via the resource name: post-CLEAR the writer
+   * should see "marker-resource", proving the original "resource" entry is gone from the table.
+   */
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private static CoreSpan<?> markerSpan() {
+    CoreSpan span = mock(CoreSpan.class);
+    when(span.isMeasured()).thenReturn(false);
+    when(span.isTopLevel()).thenReturn(true);
+    when(span.isKind(any(SpanKindFilter.class))).thenReturn(true);
+    when(span.getLongRunningVersion()).thenReturn(0);
+    when(span.getDurationNano()).thenReturn(100L);
+    when(span.getError()).thenReturn(0);
+    when(span.getResourceName()).thenReturn("marker-resource");
+    when(span.getServiceName()).thenReturn("svc");
+    when(span.getOperationName()).thenReturn("op");
+    when(span.getServiceNameSource()).thenReturn(null);
+    when(span.getType()).thenReturn("web");
+    when(span.getHttpStatusCode()).thenReturn((short) 200);
+    when(span.getParentId()).thenReturn(0L);
+    when(span.getOrigin()).thenReturn(null);
+    when(span.unsafeGetTag(eq(Tags.SPAN_KIND), any(CharSequence.class))).thenReturn("client");
+    return span;
+  }
+}
diff --git a/dd-trace-core/src/traceAgentTest/groovy/MetricsIntegrationTest.groovy b/dd-trace-core/src/traceAgentTest/groovy/MetricsIntegrationTest.groovy
index 2972ffa2c18..4c4ee81b276 100644
--- a/dd-trace-core/src/traceAgentTest/groovy/MetricsIntegrationTest.groovy
+++ b/dd-trace-core/src/traceAgentTest/groovy/MetricsIntegrationTest.groovy
@@ -7,15 +7,14 @@ import datadog.metrics.api.Histograms
 import datadog.metrics.impl.DDSketchHistograms
 import datadog.trace.api.Config
 import datadog.trace.api.WellKnownTags
-import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString
-import datadog.trace.common.metrics.AggregateMetric
+import datadog.trace.common.metrics.AggregateEntry
 import datadog.trace.common.metrics.EventListener
-import datadog.trace.common.metrics.MetricKey
 import datadog.trace.common.metrics.OkHttpSink
+import datadog.trace.common.metrics.PeerTagSchema
 import datadog.trace.common.metrics.SerializingMetricWriter
+import datadog.trace.common.metrics.SpanSnapshot
 import java.util.concurrent.CopyOnWriteArrayList
 import java.util.concurrent.CountDownLatch
-import java.util.concurrent.atomic.AtomicLongArray
 import okhttp3.HttpUrl
 
 class MetricsIntegrationTest extends AbstractTraceAgentTest {
@@ -39,14 +38,22 @@ class MetricsIntegrationTest extends AbstractTraceAgentTest {
       sink
       )
     writer.startBucket(2, System.nanoTime(), SECONDS.toNanos(10))
-    writer.add(
-      new MetricKey("resource1", "service1", "operation1", null, "sql", 0, false, true, "xyzzy", [UTF8BytesString.create("grault:quux")], null, null, null),
-      new AggregateMetric().recordDurations(5, new AtomicLongArray(2, 1, 2, 250, 4, 5))
-      )
-    writer.add(
-      new MetricKey("resource2", "service2", "operation2", null, "web", 200, false, true, "xyzzy", [UTF8BytesString.create("grault:quux")], null, null, null),
-      new AggregateMetric().recordDurations(10, new AtomicLongArray(1, 1, 200, 2, 3, 4, 5, 6, 7, 8, 9))
-      )
+    // Build entries via the production AggregateEntry.forSnapshot(snap, keyHash) path -- same
+    // construction as AggregateTable.findOrInsert. Both entries use one peer tag (grault:quux)
+    // -> schema names=["grault"], values=["quux"].
+    PeerTagSchema schema = PeerTagSchema.testSchema(["grault"] as String[])
+    SpanSnapshot snap1 = new SpanSnapshot(
+      "resource1", "service1", "operation1", null, "sql", (short) 0,
+      false, true, "xyzzy", schema, ["quux"] as String[], null, null, null, 0L)
+    def entry1 = new AggregateEntry(snap1, AggregateEntry.hashOf(snap1))
+    [2, 1, 2, 250, 4].each { entry1.recordOneDuration(it as long) }
+    writer.add(entry1)
+    SpanSnapshot snap2 = new SpanSnapshot(
+      "resource2", "service2", "operation2", null, "web", (short) 200,
+      false, true, "xyzzy", schema, ["quux"] as String[], null, null, null, 0L)
+    def entry2 = new AggregateEntry(snap2, AggregateEntry.hashOf(snap2))
+    [1, 1, 200, 2, 3, 4, 5, 6, 7, 8].each { entry2.recordOneDuration(it as long) }
+    writer.add(entry2)
     writer.finishBucket()
 
     then:
diff --git a/internal-api/src/jmh/java/datadog/trace/util/concurrent/MpscRingBufferBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/concurrent/MpscRingBufferBenchmark.java
new file mode 100644
index 00000000000..e9d084f3334
--- /dev/null
+++ b/internal-api/src/jmh/java/datadog/trace/util/concurrent/MpscRingBufferBenchmark.java
@@ -0,0 +1,146 @@
+package datadog.trace.util.concurrent;
+
+import static java.util.concurrent.TimeUnit.SECONDS;
+
+import java.util.function.BiConsumer;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Group;
+import org.openjdk.jmh.annotations.GroupThreads;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Throughput benchmarks for {@link MpscRingBuffer}.
+ *
+ * <ul>
+ *   <li>{@code write_1p / write_8p / write_16p} — producer-side throughput with a background
+ *       drainer consuming what's published. Measures the cost of one {@code tryWrite} including CAS
+ *       contention on the producer cursor at the given thread count.
+ *   <li>{@code e2e_8p} — JMH {@code @Group} pairing 8 producers with 1 consumer. Aggregate
+ *       throughput reflects whichever side is the bottleneck under realistic pressure.
+ * </ul>
+ *
+ * <p>Run with {@code -p capacity=...} to override the default ring capacity.
+ */
+@State(Scope.Benchmark)
+@Warmup(iterations = 2, time = 15, timeUnit = SECONDS)
+@Measurement(iterations = 5, time = 15, timeUnit = SECONDS)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(SECONDS)
+@Fork(value = 1)
+public class MpscRingBufferBenchmark {
+
+  /**
+   * Static filler so the lambda is non-capturing and the JIT can hoist it past the {@code tryWrite}
+   * call. Context arg comes first, slot last — matches {@code TagMap.forEach} convention.
+   */
+  private static final BiConsumer<Long, Slot> FILLER = (v, slot) -> slot.value = v;
+
+  /** Mutable slot. Replicates the per-publish allocation a real producer wants to avoid. */
+  public static final class Slot {
+    long value;
+  }
+
+  @Param({"1024"})
+  public int capacity;
+
+  /**
+   * Shared ring for the {@code write_*} benches. A background drainer keeps space available so
+   * producer benchmarks measure write throughput rather than full-ring drop throughput.
+   */
+  MpscRingBuffer<Slot> ring;
+
+  private volatile boolean stopDrainer;
+  private Thread drainerThread;
+
+  /**
+   * Separate ring for the {@code e2e_*} group benches. JMH drives both sides directly so we don't
+   * want our own background drainer for those.
+   */
+  MpscRingBuffer<Slot> e2eRing;
+
+  @Setup(Level.Trial)
+  public void setup() {
+    ring = new MpscRingBuffer<>(Slot::new, capacity);
+    e2eRing = new MpscRingBuffer<>(Slot::new, capacity);
+    stopDrainer = false;
+    drainerThread =
+        new Thread(
+            () -> {
+              while (!stopDrainer) {
+                if (ring.drain((Slot s) -> {}) == 0) Thread.yield();
+              }
+            },
+            "MpscRingBufferBenchmark-drainer");
+    drainerThread.setDaemon(true);
+    drainerThread.start();
+  }
+
+  @TearDown(Level.Trial)
+  public void teardown() throws InterruptedException {
+    stopDrainer = true;
+    drainerThread.join(5_000);
+  }
+
+  @State(Scope.Thread)
+  public static class ThreadState {
+    long counter;
+  }
+
+  // ============ Write throughput with background drainer ============
+
+  @Threads(1)
+  @Benchmark
+  public boolean write_1p(ThreadState ts) {
+    return ring.tryWrite(ts.counter++, FILLER);
+  }
+
+  @Threads(8)
+  @Benchmark
+  public boolean write_8p(ThreadState ts) {
+    return ring.tryWrite(ts.counter++, FILLER);
+  }
+
+  @Threads(16)
+  @Benchmark
+  public boolean write_16p(ThreadState ts) {
+    return ring.tryWrite(ts.counter++, FILLER);
+  }
+
+  // ============ End-to-end producer/consumer pairing ============
+  //
+  // JMH runs both methods in the same trial: 8 producer threads + 1 consumer thread. Throughput
+  // is reported as ops/sec aggregated across all 9 threads, but the consumer's drain count
+  // dwarfs the producer ops since one call processes many slots -- in practice the bottleneck
+  // is the producer side (CAS contention), and that's what the number reflects.
+
+  private static final BiConsumer<Blackhole, Slot> CONSUMER = (bh, slot) -> bh.consume(slot.value);
+
+  @Group("e2e_8p")
+  @GroupThreads(8)
+  @Benchmark
+  public boolean e2e_producer(ThreadState ts) {
+    return e2eRing.tryWrite(ts.counter++, FILLER);
+  }
+
+  @Group("e2e_8p")
+  @GroupThreads(1)
+  @Benchmark
+  public int e2e_consumer(Blackhole bh) {
+    int drained = e2eRing.drain(bh, CONSUMER);
+    if (drained == 0) Thread.yield();
+    return drained;
+  }
+}
diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java
index 07f10672273..b90fcb0fd87 100644
--- a/internal-api/src/main/java/datadog/trace/api/Config.java
+++ b/internal-api/src/main/java/datadog/trace/api/Config.java
@@ -2184,7 +2184,23 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins
         configProvider.getBoolean(TRACE_STATS_COMPUTATION_IGNORE_AGENT_VERSION, false);
     tracerMetricsBufferingEnabled =
         configProvider.getBoolean(TRACER_METRICS_BUFFERING_ENABLED, false);
-    tracerMetricsMaxAggregates = configProvider.getInteger(TRACER_METRICS_MAX_AGGREGATES, 2048);
+    // The MpscRingBuffer pre-allocates one SpanSnapshot per slot at construction, so capacity
+    // becomes a resident-RSS cost. The old TRACER_METRICS_MAX_PENDING default of 2048 (logical)
+    // was sized for the previous on-demand batch-allocation model; with the ring it would pin
+    // ~15 MB upfront for what only needs to absorb sub-second consumer stalls. Cut the default
+    // accordingly:
+    //   - normal heap: 128 logical * 64 LEGACY_BATCH_SIZE = 8192 slots, ~1 MB upfront, ~0.8 s
+    //     of buffer at 10K spans/s. Plenty of margin for GC pauses.
+    //   - tight heap (<128 MB): 64 logical * 64 = 4096 slots, ~500 KB upfront. Observed
+    //     catastrophic at Xmx64m with the prior 131072-slot default (Full-GC death spiral).
+    // Customers who explicitly configured TRACER_METRICS_MAX_PENDING keep their value (the
+    // LEGACY_BATCH_SIZE multiplier still applies to it) -- only the implicit default shrinks.
+    final boolean tightHeap = Runtime.getRuntime().maxMemory() < 128L * 1024 * 1024;
+    final int defaultMaxAggregates = tightHeap ? 256 : 2048;
+    final int defaultMaxPending = tightHeap ? 64 : 128;
+
+    tracerMetricsMaxAggregates =
+        configProvider.getInteger(TRACER_METRICS_MAX_AGGREGATES, defaultMaxAggregates);
     /*
      * TRACER_METRICS_MAX_PENDING historically counted conflating Batch slots (~64 spans per batch
      * via Batch.MAX_BATCH_SIZE). The inbox now holds 1 SpanSnapshot per metrics-eligible span, so
@@ -2199,7 +2215,8 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins
      * see java.util.ArraysSupport.SOFT_MAX_ARRAY_LENGTH for the same convention.
      */
     long requestedMaxPending =
-        (long) configProvider.getInteger(TRACER_METRICS_MAX_PENDING, 2048) * LEGACY_BATCH_SIZE;
+        (long) configProvider.getInteger(TRACER_METRICS_MAX_PENDING, defaultMaxPending)
+            * LEGACY_BATCH_SIZE;
     tracerMetricsMaxPending = (int) Math.min(requestedMaxPending, MAX_SAFE_ARRAY_SIZE);
 
     reportHostName =
diff --git a/internal-api/src/main/java/datadog/trace/util/Hashtable.java b/internal-api/src/main/java/datadog/trace/util/Hashtable.java
index 8f40e4609bc..ff3202c1f33 100644
--- a/internal-api/src/main/java/datadog/trace/util/Hashtable.java
+++ b/internal-api/src/main/java/datadog/trace/util/Hashtable.java
@@ -482,7 +482,24 @@ MutatingBucketIterator<TEntry> mutatingBucketIterator(
      */
     public static final <TEntry extends Hashtable.Entry>
         MutatingTableIterator<TEntry> mutatingTableIterator(Hashtable.Entry[] buckets) {
-      return new MutatingTableIterator<TEntry>(buckets);
+      return new MutatingTableIterator<TEntry>(buckets, 0, buckets.length);
+    }
+
+    /**
+     * Variant of {@link #mutatingTableIterator(Hashtable.Entry[])} that walks only the half-open
+     * bucket range {@code [startBucket, endBucket)}. Useful for resumable sweeps -- e.g. cursor-
+     * based eviction in {@code AggregateTable} -- where one call drives {@code [cursor, length)}
+     * and a wrap-around call drives {@code [0, cursor)}. The iterator does <b>not</b> wrap around
+     * within a single instance; callers compose two iterators when wrap-around is desired. An empty
+     * range ({@code startBucket == endBucket}) produces an immediately exhausted iterator.
+     *
+     * @param startBucket inclusive lower bound; must be in {@code [0, buckets.length]}.
+     * @param endBucket exclusive upper bound; must be in {@code [startBucket, buckets.length]}.
+     */
+    public static final <TEntry extends Hashtable.Entry>
+        MutatingTableIterator<TEntry> mutatingTableIterator(
+            Hashtable.Entry[] buckets, int startBucket, int endBucket) {
+      return new MutatingTableIterator<TEntry>(buckets, startBucket, endBucket);
     }
 
     public static final int bucketIndex(Object[] buckets, long keyHash) {
@@ -752,6 +769,9 @@ public static final class MutatingTableIterator<TEntry extends Entry>
       implements Iterator<TEntry> {
     private final Hashtable.Entry[] buckets;
 
+    /** Exclusive upper bound for bucket indices visited by this iterator. */
+    private final int endBucket;
+
     /**
      * Index of the bucket holding {@link #nextEntry} (or holding {@link #curEntry} after remove).
      */
@@ -782,9 +802,34 @@ public static final class MutatingTableIterator<TEntry extends Entry>
      */
     private Hashtable.Entry curEntry;
 
-    MutatingTableIterator(Hashtable.Entry[] buckets) {
+    MutatingTableIterator(Hashtable.Entry[] buckets, int startBucket, int endBucket) {
       this.buckets = buckets;
-      seekFromBucket(0);
+      if (startBucket < 0 || startBucket > buckets.length) {
+        throw new IndexOutOfBoundsException(
+            "startBucket " + startBucket + " out of range [0, " + buckets.length + "]");
+      }
+      if (endBucket < startBucket || endBucket > buckets.length) {
+        throw new IndexOutOfBoundsException(
+            "endBucket "
+                + endBucket
+                + " out of range ["
+                + startBucket
+                + ", "
+                + buckets.length
+                + "]");
+      }
+      this.endBucket = endBucket;
+      seekFromBucket(startBucket);
+    }
+
+    /**
+     * Bucket index of the entry last returned by {@link #next()}, or {@code -1} if {@code next} has
+     * not yet been called or the most recent call was {@link #remove()}. Useful for callers driving
+     * a cursor — e.g. resumable eviction sweeps that want to remember where the last successful
+     * removal landed.
+     */
+    public int currentBucket() {
+      return this.curBucketIndex;
     }
 
     @Override
@@ -841,12 +886,12 @@ public void remove() {
     }
 
     /**
-     * Advance {@code nextBucketIndex} / {@code nextEntry} to the first non-empty bucket >= {@code
-     * from}.
+     * Advance {@code nextBucketIndex} / {@code nextEntry} to the first non-empty bucket {@code >=
+     * from} within {@code [0, endBucket)}.
      */
     private void seekFromBucket(int from) {
       Hashtable.Entry[] thisBuckets = this.buckets;
-      for (int i = from; i < thisBuckets.length; i++) {
+      for (int i = from; i < this.endBucket; i++) {
         Hashtable.Entry head = thisBuckets[i];
         if (head != null) {
           this.nextBucketIndex = i;
diff --git a/internal-api/src/main/java/datadog/trace/util/concurrent/MpscRingBuffer.java b/internal-api/src/main/java/datadog/trace/util/concurrent/MpscRingBuffer.java
new file mode 100644
index 00000000000..4cefa43cac0
--- /dev/null
+++ b/internal-api/src/main/java/datadog/trace/util/concurrent/MpscRingBuffer.java
@@ -0,0 +1,434 @@
+package datadog.trace.util.concurrent;
+
+import datadog.trace.api.function.TriConsumer;
+import java.util.concurrent.atomic.AtomicLongArray;
+import java.util.concurrent.atomic.AtomicLongFieldUpdater;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+
+// Disruptor-style cache-line padding around the cursors. The two cursors live on different cache
+// lines so consumer-side writes to consumerCursor don't invalidate the line producers read for
+// producerCursor (and vice versa). Each padding class declares 7 longs (56 bytes); combined with
+// the cursor's own 8 bytes plus the JVM object header, each cursor + its surrounding pad fills
+// a 64-byte cache line. The HotSpot field-layout strategy preserves the declaration order across
+// the class hierarchy, so this pattern is reliable on all production JVMs we target.
+
+abstract class MpscRingBufferPad0 {
+  long p01, p02, p03, p04, p05, p06, p07;
+}
+
+abstract class MpscRingBufferProducerCursor extends MpscRingBufferPad0 {
+  /** Next sequence to claim. Producers increment via CAS through {@code PRODUCER_CURSOR}. */
+  volatile long producerCursor = -1L;
+}
+
+abstract class MpscRingBufferPad1 extends MpscRingBufferProducerCursor {
+  long p11, p12, p13, p14, p15, p16, p17;
+}
+
+abstract class MpscRingBufferConsumerCursor extends MpscRingBufferPad1 {
+  /** Highest sequence consumed. Only the consumer thread writes; producers read volatile. */
+  volatile long consumerCursor = -1L;
+}
+
+abstract class MpscRingBufferPad2 extends MpscRingBufferConsumerCursor {
+  long p21, p22, p23, p24, p25, p26, p27;
+}
+
+/**
+ * Bounded multi-producer / single-consumer ring buffer of pre-allocated {@code T} instances.
+ *
+ * <p>Each slot is a long-lived {@code T} instance created at construction time via the supplied
+ * factory and recycled forever. Producers mutate slots in place via callbacks; the consumer reads
+ * them the same way. No allocation occurs per write/read after construction, which is the entire
+ * point of this class over a queue of references.
+ *
+ * <p>The {@code BiConsumer} and {@link TriConsumer} variants take their context object(s)
+ * <i>before</i> the slot, matching the convention of {@code TagMap.forEach} and {@code
+ * Hashtable.forEach}. That ordering lets callers declare the callback as a {@code static final}
+ * non-capturing lambda and pass per-call context at the call site without allocating a closure.
+ *
+ * <h2>Thread safety contract</h2>
+ *
+ * <p>The ring buffer is thread-safe for any number of producer threads plus exactly one consumer
+ * thread. Calling {@code drain} from multiple threads concurrently is <b>not</b> supported and will
+ * corrupt state.
+ *
+ * <p>For the slot type {@code T}:
+ *
+ * <ul>
+ *   <li><b>Slot fields can be plain</b> ({@code int}, {@code long}, object references) -- they do
+ *       <i>not</i> need to be {@code volatile} or guarded by synchronization. Happens-before
+ *       between the producer's slot mutation and the consumer's slot read is provided by the ring's
+ *       internal publication-sequence machinery: a release write on the per-slot sequence inside
+ *       {@code tryWrite}, paired with an acquire read inside {@code drain}.
+ *   <li><b>Don't retain slot references past your handler's return.</b> Once a {@code tryWrite}
+ *       filler returns, the slot becomes visible to the consumer; once a {@code drain} handler
+ *       returns, the slot may be reclaimed by another producer and its fields overwritten. If the
+ *       consumer needs to keep any state from a slot, it must extract by value (or copy references)
+ *       before returning.
+ *   <li><b>Don't expose slot references outside the ring.</b> Treat {@code T} as ring-buffer-owned;
+ *       sharing a slot reference with code that doesn't follow the same discipline breaks the
+ *       happens-before story.
+ * </ul>
+ *
+ * <p>For producer fillers:
+ *
+ * <ul>
+ *   <li>Filler invocations on the same slot are serialized (one producer wins the sequence CAS), so
+ *       the filler can write fields without synchronization.
+ *   <li><b>If a filler throws, the slot is published anyway</b> (with whatever the filler had
+ *       written so far) and the exception propagates to the caller. This prevents the consumer from
+ *       getting stuck waiting for an unfinished slot; the cost is that the consumer may observe a
+ *       partially-filled or stale-fielded slot. Fillers should be written to either not throw or to
+ *       leave the slot in a state the consumer can recognize and skip.
+ * </ul>
+ *
+ * <h2>Implementation</h2>
+ *
+ * <p>Producer cursor is CAS-claimed; visibility of a claimed slot to the consumer is gated by a
+ * per-slot publication-sequence array. Consumer cursor is updated with a volatile write so
+ * producers observe space being freed. Cursors are cache-line-padded against each other (see the
+ * {@code MpscRingBufferPad*} hierarchy at the top of this file) and the publication-sequence array
+ * is strided so each logical entry occupies a distinct cache line.
+ */
+public final class MpscRingBuffer<T> extends MpscRingBufferPad2 {
+
+  /**
+   * Cache line size in {@code long}-units. 64-byte cache lines on every common CPU we ship to (x86,
+   * ARM); 8 bytes per long. Each logical slot in {@link #publishedSequences} is spread out by this
+   * stride so adjacent logical sequences don't share a cache line and don't ping-pong between
+   * producer cores under heavy contention.
+   */
+  private static final int CACHE_LINE_LONGS = 8;
+
+  @SuppressWarnings("rawtypes") // AtomicLongFieldUpdater can't take a parameterized class
+  private static final AtomicLongFieldUpdater<MpscRingBufferProducerCursor> PRODUCER_CURSOR =
+      AtomicLongFieldUpdater.newUpdater(MpscRingBufferProducerCursor.class, "producerCursor");
+
+  private final T[] slots;
+
+  /**
+   * Per-slot publication sequence, strided by {@link #CACHE_LINE_LONGS} to avoid false sharing.
+   * Producers write the claimed sequence here as the last step of a publish (release write via
+   * {@link AtomicLongArray#set}); the consumer reads it (acquire read) to determine whether the
+   * slot at the next position is ready. A slot is considered published for sequence {@code s} iff
+   * {@code publishedSequences[(s & mask) * CACHE_LINE_LONGS] == s}.
+   */
+  private final AtomicLongArray publishedSequences;
+
+  private final int capacity;
+  private final int mask;
+
+  @SuppressWarnings("unchecked")
+  public MpscRingBuffer(final Supplier<T> factory, final int capacityHint) {
+    if (capacityHint < 1) {
+      throw new IllegalArgumentException("capacityHint must be >= 1, got " + capacityHint);
+    }
+    this.capacity = nextPowerOfTwo(capacityHint);
+    if (this.capacity < 1) {
+      throw new IllegalArgumentException("capacity overflow for hint " + capacityHint);
+    }
+    this.mask = capacity - 1;
+    this.slots = (T[]) new Object[capacity];
+    this.publishedSequences = new AtomicLongArray(capacity * CACHE_LINE_LONGS);
+    for (int i = 0; i < capacity; i++) {
+      slots[i] = factory.get();
+      // Initial: sentinel "no sequence published here yet" -- anything < 0 works since
+      // sequences are 0-based and monotonically increasing.
+      publishedSequences.set(i * CACHE_LINE_LONGS, Long.MIN_VALUE);
+    }
+  }
+
+  public int capacity() {
+    return capacity;
+  }
+
+  /** Approximate count of slots holding unread items. May briefly exceed capacity under race. */
+  public int size() {
+    final long p = producerCursor;
+    final long c = consumerCursor;
+    final long diff = p - c;
+    if (diff <= 0) return 0;
+    if (diff > capacity) return capacity;
+    return (int) diff;
+  }
+
+  public boolean isEmpty() {
+    return producerCursor == consumerCursor;
+  }
+
+  /** {@code true} if the slot was filled and published; {@code false} if the ring is full. */
+  public boolean tryWrite(final Consumer<? super T> filler) {
+    final long seq = claim();
+    if (seq < 0L) return false;
+    // publish in finally so a throwing filler doesn't leave the slot un-published -- the
+    // consumer would otherwise wait at that sequence forever. See class javadoc.
+    try {
+      filler.accept(slots[(int) (seq & mask)]);
+    } finally {
+      publish(seq);
+    }
+    return true;
+  }
+
+  public <C> boolean tryWrite(final C context, final BiConsumer<? super C, ? super T> filler) {
+    final long seq = claim();
+    if (seq < 0L) return false;
+    try {
+      filler.accept(context, slots[(int) (seq & mask)]);
+    } finally {
+      publish(seq);
+    }
+    return true;
+  }
+
+  public <C1, C2> boolean tryWrite(
+      final C1 context1,
+      final C2 context2,
+      final TriConsumer<? super C1, ? super C2, ? super T> filler) {
+    final long seq = claim();
+    if (seq < 0L) return false;
+    try {
+      filler.accept(context1, context2, slots[(int) (seq & mask)]);
+    } finally {
+      publish(seq);
+    }
+    return true;
+  }
+
+  /** Drains all currently-available slots. Returns the count processed. */
+  public int drain(final Consumer<? super T> handler) {
+    long cursor = consumerCursor;
+    int count = 0;
+    while (true) {
+      final long nextSeq = cursor + 1L;
+      final int idx = (int) (nextSeq & mask);
+      if (publishedSequences.get(idx * CACHE_LINE_LONGS) != nextSeq) break;
+      handler.accept(slots[idx]);
+      cursor = nextSeq;
+      count++;
+    }
+    if (count > 0) consumerCursor = cursor;
+    return count;
+  }
+
+  public <C> int drain(final C context, final BiConsumer<? super C, ? super T> handler) {
+    long cursor = consumerCursor;
+    int count = 0;
+    while (true) {
+      final long nextSeq = cursor + 1L;
+      final int idx = (int) (nextSeq & mask);
+      if (publishedSequences.get(idx * CACHE_LINE_LONGS) != nextSeq) break;
+      handler.accept(context, slots[idx]);
+      cursor = nextSeq;
+      count++;
+    }
+    if (count > 0) consumerCursor = cursor;
+    return count;
+  }
+
+  public <C1, C2> int drain(
+      final C1 context1,
+      final C2 context2,
+      final TriConsumer<? super C1, ? super C2, ? super T> handler) {
+    long cursor = consumerCursor;
+    int count = 0;
+    while (true) {
+      final long nextSeq = cursor + 1L;
+      final int idx = (int) (nextSeq & mask);
+      if (publishedSequences.get(idx * CACHE_LINE_LONGS) != nextSeq) break;
+      handler.accept(context1, context2, slots[idx]);
+      cursor = nextSeq;
+      count++;
+    }
+    if (count > 0) consumerCursor = cursor;
+    return count;
+  }
+
+  /**
+   * Try to claim a contiguous range of {@code n} sequences in a single CAS. Returns {@code null} if
+   * the ring doesn't have room for the whole batch -- the caller treats that as "drop all {@code
+   * n}", which is the natural shape for callers that batch by a higher-level unit (e.g. one CSS
+   * publish per completed trace). When the caller has a list of N items to write, this amortizes
+   * producer-cursor contention from O(N) CASes to O(1) per call.
+   *
+   * <p>The returned {@link Batch} must be filled via {@link Batch#fillAndPublish} exactly {@code n}
+   * times. Under-publishing leaves the ring stuck at the unfilled sequence -- the consumer waits
+   * there forever. Over-publishing throws {@link IllegalStateException}.
+   *
+   * @throws IllegalArgumentException if {@code n &lt; 1}
+   */
+  public Batch tryClaim(final int n) {
+    if (n < 1) {
+      throw new IllegalArgumentException("n must be >= 1, got " + n);
+    }
+    while (true) {
+      final long current = producerCursor;
+      // Stale read of consumerCursor is fine: a false "full" reading just causes a drop, and a
+      // real one is correctly identified because consumerCursor only advances.
+      final long consumed = consumerCursor;
+      final long next = current + n;
+      if (next - consumed > capacity) {
+        return null;
+      }
+      if (PRODUCER_CURSOR.compareAndSet(this, current, next)) {
+        // Claimed sequences [current + 1, next] inclusive (== n sequences total).
+        return new Batch(current + 1L, n);
+      }
+      // CAS failure -> another producer claimed; retry.
+    }
+  }
+
+  /** CAS-claim the next sequence, or return {@code -1} if the ring is full. */
+  private long claim() {
+    while (true) {
+      final long current = producerCursor;
+      // Stale read of consumerCursor is fine: a false "full" reading just causes a drop, and
+      // a real "full" reading is correctly identified because consumerCursor only advances.
+      final long consumed = consumerCursor;
+      if (current - consumed >= capacity) {
+        return -1L;
+      }
+      final long next = current + 1L;
+      if (PRODUCER_CURSOR.compareAndSet(this, current, next)) {
+        return next;
+      }
+      // CAS failure -> another producer claimed; retry.
+    }
+  }
+
+  /**
+   * Handle returned by {@link MpscRingBuffer#tryClaim}. Holds a contiguous range of pre-claimed
+   * sequences belonging to the producer thread that called {@code tryClaim}; the caller must fill
+   * and publish each via {@link #fillAndPublish}.
+   *
+   * <p><b>Not thread-safe</b> -- the producer thread owns it for the lifetime of the call. Do not
+   * share across threads.
+   */
+  public final class Batch {
+    private final long startSeq;
+    private final int size;
+    private int published;
+
+    Batch(final long startSeq, final int size) {
+      this.startSeq = startSeq;
+      this.size = size;
+    }
+
+    /** Total slots in this batch (the {@code n} passed to {@code tryClaim}). */
+    public int size() {
+      return size;
+    }
+
+    /** Slots not yet filled. */
+    public int remaining() {
+      return size - published;
+    }
+
+    public void fillAndPublish(final Consumer<? super T> filler) {
+      final long seq = nextSeq();
+      final int idx = (int) (seq & mask);
+      try {
+        filler.accept(slots[idx]);
+      } finally {
+        publishedSequences.set(idx * CACHE_LINE_LONGS, seq);
+      }
+    }
+
+    public <C> void fillAndPublish(final C context, final BiConsumer<? super C, ? super T> filler) {
+      final long seq = nextSeq();
+      final int idx = (int) (seq & mask);
+      try {
+        filler.accept(context, slots[idx]);
+      } finally {
+        publishedSequences.set(idx * CACHE_LINE_LONGS, seq);
+      }
+    }
+
+    public <C1, C2> void fillAndPublish(
+        final C1 context1,
+        final C2 context2,
+        final TriConsumer<? super C1, ? super C2, ? super T> filler) {
+      final long seq = nextSeq();
+      final int idx = (int) (seq & mask);
+      try {
+        filler.accept(context1, context2, slots[idx]);
+      } finally {
+        publishedSequences.set(idx * CACHE_LINE_LONGS, seq);
+      }
+    }
+
+    private long nextSeq() {
+      if (published >= size) {
+        throw new IllegalStateException(
+            "Batch over-published: size=" + size + " published=" + published);
+      }
+      final long seq = startSeq + published;
+      published++;
+      return seq;
+    }
+  }
+
+  // ============ Low-level primitives ============
+  //
+  // These three methods (tryClaimRange / slotAt / publish) expose the producer-side machinery
+  // directly. Callers manage the claimed sequence range themselves -- no per-call handle
+  // allocated, no callback dispatched. This is intended for hot paths where the higher-level
+  // tryWrite/tryClaim+Batch APIs would allocate per call (Iterator on the iterable being
+  // batched, or the Batch object itself) that escape analysis doesn't eliminate.
+  //
+  // Misuse hazards: every claimed sequence MUST be published exactly once; sequences must be
+  // published in [start, start+n) range only; the slot returned by slotAt() must not escape past
+  // the next publish() of any sequence (the producer "owns" the slot until publish).
+
+  /**
+   * Try to claim a contiguous range of {@code n} sequences in a single CAS. Returns the
+   * <i>start</i> sequence on success (a non-negative long) or {@code -1L} if the ring doesn't have
+   * room for the whole batch. The caller must publish each sequence in {@code [start, start + n)}
+   * exactly once via {@link #publish(long)}.
+   *
+   * @throws IllegalArgumentException if {@code n &lt; 1}
+   */
+  public long tryClaimRange(final int n) {
+    if (n < 1) {
+      throw new IllegalArgumentException("n must be >= 1, got " + n);
+    }
+    while (true) {
+      final long current = producerCursor;
+      final long consumed = consumerCursor;
+      final long next = current + n;
+      if (next - consumed > capacity) {
+        return -1L;
+      }
+      if (PRODUCER_CURSOR.compareAndSet(this, current, next)) {
+        return current + 1L;
+      }
+    }
+  }
+
+  /**
+   * Slot for sequence {@code seq}. Only safe when {@code seq} is in a range currently owned by the
+   * caller (claimed via {@link #tryClaimRange} and not yet published).
+   */
+  public T slotAt(final long seq) {
+    return slots[(int) (seq & mask)];
+  }
+
+  /**
+   * Publish sequence {@code seq}, making the slot at {@code slotAt(seq)} visible to the consumer.
+   * Release semantics via {@link AtomicLongArray#set}.
+   */
+  public void publish(final long seq) {
+    publishedSequences.set(((int) (seq & mask)) * CACHE_LINE_LONGS, seq);
+  }
+
+  private static int nextPowerOfTwo(final int n) {
+    if (n <= 1) return 1;
+    // 32 - leadingZeros(n-1) gives the number of bits needed to represent n-1; shifting 1 by that
+    // gives the smallest power of two >= n.
+    final int bits = 32 - Integer.numberOfLeadingZeros(n - 1);
+    return 1 << bits;
+  }
+}
diff --git a/internal-api/src/test/java/datadog/trace/util/HashtableTest.java b/internal-api/src/test/java/datadog/trace/util/HashtableTest.java
index 2992279be6d..953453ca3aa 100644
--- a/internal-api/src/test/java/datadog/trace/util/HashtableTest.java
+++ b/internal-api/src/test/java/datadog/trace/util/HashtableTest.java
@@ -349,5 +349,64 @@ void removeTwiceWithoutInterveningNextThrows() {
       it.remove();
       assertThrows(IllegalStateException.class, it::remove);
     }
+
+    @Test
+    void halfOpenRangeOmitsBucketsOutsideTheRange() {
+      // CollidingKey lets us pin entries to specific buckets via controlled hashCode. 16-slot
+      // table -> bucketIndex = hash & 15. Place entries in buckets 0, 5, and 10; iterate
+      // [5, 10) -- should see only bucket 5.
+      Hashtable.D1<CollidingKey, CollidingKeyEntry> table = new Hashtable.D1<>(16);
+      table.insert(new CollidingKeyEntry(new CollidingKey("b0", 0), 1));
+      table.insert(new CollidingKeyEntry(new CollidingKey("b5", 5), 2));
+      table.insert(new CollidingKeyEntry(new CollidingKey("b10", 10), 3));
+
+      Set<String> seen = new HashSet<>();
+      for (MutatingTableIterator<CollidingKeyEntry> it =
+              Support.mutatingTableIterator(table.buckets, 5, 10);
+          it.hasNext(); ) {
+        seen.add(it.next().key.label);
+      }
+      assertEquals(1, seen.size());
+      assertTrue(seen.contains("b5"));
+    }
+
+    @Test
+    void emptyHalfOpenRangeIsExhausted() {
+      // start == end -> immediately-exhausted iterator. Important: this is the wrap-around
+      // pass [0, cursor) when cursor == 0 in resumable sweeps.
+      Hashtable.D1<String, StringIntEntry> table = new Hashtable.D1<>(8);
+      table.insert(new StringIntEntry("a", 1));
+      MutatingTableIterator<StringIntEntry> it = Support.mutatingTableIterator(table.buckets, 0, 0);
+      assertFalse(it.hasNext());
+    }
+
+    @Test
+    void rangeBoundsOutOfOrderThrows() {
+      Hashtable.D1<String, StringIntEntry> table = new Hashtable.D1<>(8);
+      assertThrows(
+          IndexOutOfBoundsException.class,
+          () -> Support.mutatingTableIterator(table.buckets, -1, 4));
+      assertThrows(
+          IndexOutOfBoundsException.class,
+          () -> Support.mutatingTableIterator(table.buckets, 4, 2)); // end < start
+      assertThrows(
+          IndexOutOfBoundsException.class,
+          () ->
+              Support.mutatingTableIterator(
+                  table.buckets, 0, table.buckets.length + 1)); // end > len
+    }
+
+    @Test
+    void currentBucketReportsLandingIndex() {
+      // Pin one entry to a known bucket and check currentBucket() after next() reports that
+      // bucket. Before any next() (or after remove()), currentBucket() returns -1.
+      Hashtable.D1<CollidingKey, CollidingKeyEntry> table = new Hashtable.D1<>(16);
+      table.insert(new CollidingKeyEntry(new CollidingKey("b3", 3), 1));
+
+      MutatingTableIterator<CollidingKeyEntry> it = Support.mutatingTableIterator(table.buckets);
+      assertEquals(-1, it.currentBucket(), "before any next() currentBucket should be -1");
+      it.next();
+      assertEquals(3, it.currentBucket(), "currentBucket should report the entry's bucket");
+    }
   }
 }
diff --git a/internal-api/src/test/java/datadog/trace/util/concurrent/MpscRingBufferTest.java b/internal-api/src/test/java/datadog/trace/util/concurrent/MpscRingBufferTest.java
new file mode 100644
index 00000000000..6aeb635abd0
--- /dev/null
+++ b/internal-api/src/test/java/datadog/trace/util/concurrent/MpscRingBufferTest.java
@@ -0,0 +1,498 @@
+package datadog.trace.util.concurrent;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import datadog.trace.api.function.TriConsumer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.BiConsumer;
+import org.junit.jupiter.api.Test;
+
+class MpscRingBufferTest {
+
+  /** Mutable slot used by the tests; replaces the per-publish allocation a real consumer avoids. */
+  static final class Slot {
+    int value;
+    String tag;
+  }
+
+  // ============ Construction ============
+
+  @Test
+  void capacityRoundsUpToPowerOfTwo() {
+    assertEquals(1, new MpscRingBuffer<>(Slot::new, 1).capacity());
+    assertEquals(2, new MpscRingBuffer<>(Slot::new, 2).capacity());
+    assertEquals(4, new MpscRingBuffer<>(Slot::new, 3).capacity());
+    assertEquals(16, new MpscRingBuffer<>(Slot::new, 10).capacity());
+    assertEquals(1024, new MpscRingBuffer<>(Slot::new, 1024).capacity());
+    assertEquals(2048, new MpscRingBuffer<>(Slot::new, 1025).capacity());
+  }
+
+  @Test
+  void rejectsNonPositiveCapacityHint() {
+    assertThrows(IllegalArgumentException.class, () -> new MpscRingBuffer<>(Slot::new, 0));
+    assertThrows(IllegalArgumentException.class, () -> new MpscRingBuffer<>(Slot::new, -1));
+  }
+
+  @Test
+  void slotsArePreAllocatedFromFactory() {
+    AtomicInteger calls = new AtomicInteger();
+    MpscRingBuffer<Slot> ring =
+        new MpscRingBuffer<>(
+            () -> {
+              calls.incrementAndGet();
+              return new Slot();
+            },
+            8);
+    assertEquals(8, ring.capacity());
+    assertEquals(8, calls.get(), "factory must run capacity times during construction");
+  }
+
+  // ============ Basic produce / consume ============
+
+  @Test
+  void emptyRingIsEmpty() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    assertTrue(ring.isEmpty());
+    assertEquals(0, ring.size());
+    assertEquals(0, ring.drain(s -> {}));
+  }
+
+  @Test
+  void singleWriteThenDrain() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    assertTrue(
+        ring.tryWrite(
+            s -> {
+              s.value = 42;
+              s.tag = "hello";
+            }));
+    assertEquals(1, ring.size());
+
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(1, drained);
+    assertEquals(Arrays.asList(42), seen);
+    assertTrue(ring.isEmpty());
+  }
+
+  @Test
+  void writesPreserveFIFOOrder() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    for (int i = 0; i < 6; i++) {
+      final int v = i;
+      assertTrue(ring.tryWrite(s -> s.value = v));
+    }
+    List<Integer> seen = new ArrayList<>();
+    ring.drain(s -> seen.add(s.value));
+    assertEquals(Arrays.asList(0, 1, 2, 3, 4, 5), seen);
+  }
+
+  // ============ Full / drop behavior ============
+
+  @Test
+  void writesBeyondCapacityFail() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    for (int i = 0; i < 4; i++) {
+      assertTrue(ring.tryWrite(s -> s.value = 0));
+    }
+    assertFalse(ring.tryWrite(s -> s.value = 0), "5th write must fail when capacity == 4");
+    assertEquals(4, ring.size());
+  }
+
+  @Test
+  void writesResumeAfterDrain() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    for (int i = 0; i < 4; i++) {
+      final int v = i;
+      assertTrue(ring.tryWrite(s -> s.value = v));
+    }
+    assertFalse(ring.tryWrite(s -> {}));
+
+    AtomicInteger sum = new AtomicInteger();
+    ring.drain(s -> sum.addAndGet(s.value));
+    assertEquals(0 + 1 + 2 + 3, sum.get());
+
+    // Now should accept another full round.
+    for (int i = 100; i < 104; i++) {
+      final int v = i;
+      assertTrue(ring.tryWrite(s -> s.value = v));
+    }
+    assertFalse(ring.tryWrite(s -> {}));
+  }
+
+  // ============ Context-passing variants (context first, slot last) ============
+
+  @Test
+  void writeAndDrainWithOneContext() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    BiConsumer<Integer, Slot> filler = (ctx, s) -> s.value = ctx;
+    assertTrue(ring.tryWrite(7, filler));
+    assertTrue(ring.tryWrite(8, filler));
+
+    List<Integer> seen = new ArrayList<>();
+    BiConsumer<List<Integer>, Slot> reader = (sink, s) -> sink.add(s.value);
+    assertEquals(2, ring.drain(seen, reader));
+    assertEquals(Arrays.asList(7, 8), seen);
+  }
+
+  @Test
+  void writeAndDrainWithTwoContexts() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    TriConsumer<Integer, String, Slot> filler =
+        (v, t, s) -> {
+          s.value = v;
+          s.tag = t;
+        };
+    assertTrue(ring.tryWrite(1, "a", filler));
+    assertTrue(ring.tryWrite(2, "b", filler));
+
+    List<String> tags = new ArrayList<>();
+    List<Integer> vals = new ArrayList<>();
+    TriConsumer<List<String>, List<Integer>, Slot> reader =
+        (ts, vs, s) -> {
+          ts.add(s.tag);
+          vs.add(s.value);
+        };
+    assertEquals(2, ring.drain(tags, vals, reader));
+    assertEquals(Arrays.asList("a", "b"), tags);
+    assertEquals(Arrays.asList(1, 2), vals);
+  }
+
+  // ============ Concurrency ============
+
+  @Test
+  void manyProducersSingleConsumerSeesEveryWrittenValue() throws InterruptedException {
+    final int producers = 8;
+    final int perProducer = 50_000;
+    final int total = producers * perProducer;
+
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 1024);
+
+    ExecutorService producerPool = Executors.newFixedThreadPool(producers);
+    AtomicInteger dropped = new AtomicInteger();
+    AtomicInteger written = new AtomicInteger();
+    CountDownLatch start = new CountDownLatch(1);
+
+    BiConsumer<Integer, Slot> filler = (v, s) -> s.value = v;
+
+    for (int p = 0; p < producers; p++) {
+      final int base = p * perProducer;
+      producerPool.submit(
+          () -> {
+            try {
+              start.await();
+            } catch (InterruptedException ie) {
+              Thread.currentThread().interrupt();
+              return;
+            }
+            for (int i = 0; i < perProducer; i++) {
+              int v = base + i;
+              // Spin until the consumer makes room. We're testing correctness, not throughput.
+              while (!ring.tryWrite(v, filler)) {
+                dropped.incrementAndGet();
+                Thread.yield();
+              }
+              written.incrementAndGet();
+            }
+          });
+    }
+
+    Set<Integer> seen = new HashSet<>(total);
+    BiConsumer<Set<Integer>, Slot> reader = (sink, s) -> sink.add(s.value);
+
+    Thread consumer =
+        new Thread(
+            () -> {
+              while (seen.size() < total) {
+                if (ring.drain(seen, reader) == 0) Thread.yield();
+              }
+            },
+            "ring-consumer");
+    consumer.start();
+
+    start.countDown();
+    producerPool.shutdown();
+    assertTrue(producerPool.awaitTermination(30, TimeUnit.SECONDS), "producers timed out");
+    assertTrue(consumer.isAlive() || seen.size() == total);
+    consumer.join(30_000);
+    assertFalse(consumer.isAlive(), "consumer timed out");
+
+    assertEquals(total, written.get(), "every producer call should eventually succeed");
+    assertEquals(total, seen.size(), "consumer must see every value exactly once");
+  }
+
+  @Test
+  void sizeReflectsOutstandingWrites() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    assertEquals(0, ring.size());
+    ring.tryWrite(s -> {});
+    assertEquals(1, ring.size());
+    ring.tryWrite(s -> {});
+    assertEquals(2, ring.size());
+    ring.drain(s -> {});
+    assertEquals(0, ring.size());
+  }
+
+  @Test
+  void throwingFillerStillPublishesSoConsumerDoesntHang() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+
+    // First write succeeds.
+    assertTrue(ring.tryWrite(s -> s.value = 1));
+
+    // Second write throws midway through filling. Slot must still be published so the consumer's
+    // drain can advance past it.
+    RuntimeException boom = new RuntimeException("boom");
+    RuntimeException thrown =
+        assertThrows(
+            RuntimeException.class,
+            () ->
+                ring.tryWrite(
+                    s -> {
+                      s.value = 2;
+                      throw boom;
+                    }));
+    assertSame(boom, thrown);
+
+    // Third write proves the ring's cursors are still healthy after the throw.
+    assertTrue(ring.tryWrite(s -> s.value = 3));
+
+    // Consumer drains all three: it must not hang on the partially-filled slot.
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(3, drained, "consumer must advance past the throwing slot");
+    assertEquals(Arrays.asList(1, 2, 3), seen, "throwing slot keeps whatever filler had written");
+  }
+
+  // ============ Batch claim (tryClaim) ============
+
+  @Test
+  void tryClaimReturnsBatchOfRequestedSize() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(3);
+
+    assertNotNull(batch);
+    assertEquals(3, batch.size());
+    assertEquals(3, batch.remaining());
+  }
+
+  @Test
+  void tryClaimRejectsZeroOrNegative() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    assertThrows(IllegalArgumentException.class, () -> ring.tryClaim(0));
+    assertThrows(IllegalArgumentException.class, () -> ring.tryClaim(-1));
+  }
+
+  @Test
+  void tryClaimReturnsNullWhenRingCantFitBatch() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    assertNotNull(ring.tryClaim(3));
+    // Only 1 slot left; claiming 2 must fail wholesale.
+    assertNull(ring.tryClaim(2), "all-or-nothing: partial batches are not allowed");
+    // But one more slot does fit.
+    assertNotNull(ring.tryClaim(1));
+    // Now full.
+    assertNull(ring.tryClaim(1));
+  }
+
+  @Test
+  void tryClaimFillAndPublishDeliversAllToDrain() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(5);
+
+    for (int i = 0; i < 5; i++) {
+      final int v = i;
+      batch.fillAndPublish(s -> s.value = v);
+    }
+    assertEquals(0, batch.remaining());
+
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(5, drained);
+    assertEquals(Arrays.asList(0, 1, 2, 3, 4), seen, "batch publishes in order");
+  }
+
+  @Test
+  void overPublishingBatchThrows() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(2);
+
+    batch.fillAndPublish(s -> s.value = 1);
+    batch.fillAndPublish(s -> s.value = 2);
+    assertThrows(IllegalStateException.class, () -> batch.fillAndPublish(s -> s.value = 3));
+  }
+
+  @Test
+  void batchSupportsContextFillers() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(3);
+
+    BiConsumer<Integer, Slot> oneCtx = (v, s) -> s.value = v;
+    TriConsumer<Integer, String, Slot> twoCtx =
+        (v, t, s) -> {
+          s.value = v;
+          s.tag = t;
+        };
+
+    batch.fillAndPublish(s -> s.value = 1);
+    batch.fillAndPublish(2, oneCtx);
+    batch.fillAndPublish(3, "three", twoCtx);
+
+    List<String> seen = new ArrayList<>();
+    ring.drain(s -> seen.add(s.value + "/" + s.tag));
+    assertEquals(Arrays.asList("1/null", "2/null", "3/three"), seen);
+  }
+
+  @Test
+  void batchFillerThrowStillPublishesAndAdvances() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(3);
+
+    batch.fillAndPublish(s -> s.value = 1);
+    RuntimeException boom = new RuntimeException("boom");
+    assertThrows(
+        RuntimeException.class,
+        () ->
+            batch.fillAndPublish(
+                s -> {
+                  s.value = 2;
+                  throw boom;
+                }));
+    // The throwing slot's sequence has already been consumed; published counter advanced.
+    assertEquals(1, batch.remaining(), "throwing slot still counts as published");
+    batch.fillAndPublish(s -> s.value = 3);
+
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(3, drained);
+    assertEquals(Arrays.asList(1, 2, 3), seen);
+  }
+
+  // ============ Low-level primitives (tryClaimRange / slotAt / publish) ============
+
+  @Test
+  void tryClaimRangeReturnsStartSequence() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    long start1 = ring.tryClaimRange(3);
+    long start2 = ring.tryClaimRange(2);
+
+    assertEquals(0L, start1, "first range starts at sequence 0");
+    assertEquals(3L, start2, "second range begins immediately after the first");
+  }
+
+  @Test
+  void tryClaimRangeRejectsZeroOrNegative() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    assertThrows(IllegalArgumentException.class, () -> ring.tryClaimRange(0));
+    assertThrows(IllegalArgumentException.class, () -> ring.tryClaimRange(-1));
+  }
+
+  @Test
+  void tryClaimRangeReturnsMinusOneWhenFull() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    assertTrue(ring.tryClaimRange(3) >= 0);
+    assertEquals(-1L, ring.tryClaimRange(2), "all-or-nothing");
+    assertTrue(ring.tryClaimRange(1) >= 0);
+    assertEquals(-1L, ring.tryClaimRange(1));
+  }
+
+  @Test
+  void slotAtAndPublishRoundTrip() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    long start = ring.tryClaimRange(3);
+    assertTrue(start >= 0);
+
+    for (int i = 0; i < 3; i++) {
+      long seq = start + i;
+      Slot slot = ring.slotAt(seq);
+      slot.value = (int) (seq + 100);
+      ring.publish(seq);
+    }
+
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(3, drained);
+    assertEquals(Arrays.asList(100, 101, 102), seen);
+  }
+
+  @Test
+  void slotAtReturnsSameInstanceForSameModuloPosition() {
+    // After publish+drain wraps around, the slot at sequence N and sequence N+capacity are the
+    // same physical object (this is the whole point of the ring).
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    Slot firstSlot = ring.slotAt(0L);
+    Slot wrappedSlot = ring.slotAt(4L); // 4 & mask(3) == 0
+    assertSame(firstSlot, wrappedSlot);
+  }
+
+  @Test
+  void concurrentBatchClaimsAreOrderedAndDontInterleave() throws InterruptedException {
+    final int producers = 8;
+    final int batchesPerProducer = 200;
+    final int batchSize = 16;
+    final int total = producers * batchesPerProducer * batchSize;
+
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 256);
+    ExecutorService pool = Executors.newFixedThreadPool(producers);
+    AtomicInteger writes = new AtomicInteger();
+    CountDownLatch start = new CountDownLatch(1);
+
+    for (int p = 0; p < producers; p++) {
+      final int base = p * batchesPerProducer * batchSize;
+      pool.submit(
+          () -> {
+            try {
+              start.await();
+            } catch (InterruptedException ie) {
+              Thread.currentThread().interrupt();
+              return;
+            }
+            for (int b = 0; b < batchesPerProducer; b++) {
+              MpscRingBuffer<Slot>.Batch batch;
+              while ((batch = ring.tryClaim(batchSize)) == null) {
+                Thread.yield();
+              }
+              for (int i = 0; i < batchSize; i++) {
+                final int v = base + b * batchSize + i;
+                batch.fillAndPublish(s -> s.value = v);
+              }
+              writes.addAndGet(batchSize);
+            }
+          });
+    }
+
+    Set<Integer> seen = new HashSet<>(total);
+    Thread consumer =
+        new Thread(
+            () -> {
+              while (seen.size() < total) {
+                if (ring.drain((Slot s) -> seen.add(s.value)) == 0) Thread.yield();
+              }
+            },
+            "ring-batch-consumer");
+    consumer.start();
+
+    start.countDown();
+    pool.shutdown();
+    assertTrue(pool.awaitTermination(30, TimeUnit.SECONDS), "producers timed out");
+    consumer.join(30_000);
+    assertFalse(consumer.isAlive(), "consumer timed out");
+    assertEquals(total, writes.get());
+    assertEquals(total, seen.size(), "consumer must see every value exactly once");
+  }
+}